diff --git "a/profile_trace/iteration_121856/rank2_trace.json" "b/profile_trace/iteration_121856/rank2_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_121856/rank2_trace.json" @@ -0,0 +1,83427 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 2, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "D4CFB559C28E4C209DAFA782130C33FA", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016046209.475, "dur": 163.419, + "args": { + "External id": 2940929,"Record function id": 0, "Sequence number": 29294790, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016046236.350, "dur": 123.326, + "args": { + "External id": 2940930,"Sequence number": 29294790, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 1336755, "tid": 1381158, "ts": 1555016046236.350, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336755, "tid": 1381158, + "ts": 1555016046248.728, "dur": 108.192, + "args": { + "External id": 2940931,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016046387.746, "dur": 308.170, + "args": { + "External id": 2940932,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016046479.690, "dur": 127.925, + "args": { + "External id": 2940933,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.27", "pid": 1336755, "tid": 1381158, + "ts": 1555016046528.008, "dur": 68.007, + "args": { + "External id": 2940934,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016046612.010, "dur": 1.870, + "args": { + "External id": 2940935,"Sequence number": 29294789, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 1336755, "tid": 1381158, "ts": 1555016046612.010, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016046619.801, "dur": 68.379, + "args": { + "External id": 2940936,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016046631.117, "dur": 56.551, + "args": { + "External id": 2940937,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016046641.907, "dur": 4.205, + "args": { + "External id": 2940938,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016046706.053, "dur": 36527.607, + "args": { + "External id": 2940939,"Record function id": 0, "Sequence number": 29294787, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016046707.842, "dur": 36510.829, + "args": { + "External id": 2940940,"Sequence number": 29294787, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 1336755, "tid": 1381158, "ts": 1555016046707.842, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016046768.064, "dur": 3.955, + "args": { + "External id": 2940941,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016046778.590, "dur": 36287.089, + "args": { + "External id": 2940942,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016046781.016, "dur": 36284.318, + "args": { + "External id": 2940943,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016046787.595, "dur": 12.438, + "args": { + "External id": 2940944,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016046802.452, "dur": 36260.898, + "args": { + "External id": 2940945,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336755, "tid": 1381158, + "ts": 1555016083071.052, "dur": 0.888, + "args": { + "External id": 2940946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083074.880, "dur": 3.657, + "args": { + "External id": 2940947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083076.849, "dur": 1.480, + "args": { + "External id": 2940948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1381158, + "ts": 1555016083086.117, "dur": 36.400, + "args": { + "External id": 2940949,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336755, "tid": 1381158, + "ts": 1555016083133.121, "dur": 73.109, + "args": { + "External id": 2940950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1381158, + "ts": 1555016083135.086, "dur": 70.904, + "args": { + "External id": 2940951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1381158, + "ts": 1555016083137.551, "dur": 67.919, + "args": { + "External id": 2940952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083244.285, "dur": 23.380, + "args": { + "External id": 2940953,"Record function id": 0, "Sequence number": 29294786, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083246.239, "dur": 17.946, + "args": { + "External id": 2940954,"Sequence number": 29294786, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 1336755, "tid": 1381158, "ts": 1555016083246.239, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016083250.810, "dur": 13.121, + "args": { + "External id": 2940955,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016083256.170, "dur": 7.579, + "args": { + "External id": 2940956,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083271.660, "dur": 126.431, + "args": { + "External id": 2940957,"Record function id": 0, "Sequence number": 29294785, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083272.819, "dur": 113.548, + "args": { + "External id": 2940958,"Sequence number": 29294785, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 1336755, "tid": 1381158, "ts": 1555016083272.819, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016083276.811, "dur": 109.018, + "args": { + "External id": 2940959,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016083283.911, "dur": 44.904, + "args": { + "External id": 2940960,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016083288.828, "dur": 7.668, + "args": { + "External id": 2940961,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083298.923, "dur": 29.577, + "args": { + "External id": 2940962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083305.136, "dur": 22.833, + "args": { + "External id": 2940963,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016083331.721, "dur": 4.980, + "args": { + "External id": 2940964,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016083334.794, "dur": 1.601, + "args": { + "External id": 2940965,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083337.712, "dur": 47.106, + "args": { + "External id": 2940966,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083402.762, "dur": 75.875, + "args": { + "External id": 2940967,"Record function id": 0, "Sequence number": 29294784, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083404.090, "dur": 69.612, + "args": { + "External id": 2940968,"Sequence number": 29294784, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 1336755, "tid": 1381158, "ts": 1555016083404.090, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016083408.066, "dur": 65.291, + "args": { + "External id": 2940969,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016083414.130, "dur": 23.700, + "args": { + "External id": 2940970,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016083415.290, "dur": 3.912, + "args": { + "External id": 2940971,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083419.796, "dur": 17.771, + "args": { + "External id": 2940972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083423.242, "dur": 13.981, + "args": { + "External id": 2940973,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016083442.406, "dur": 6.895, + "args": { + "External id": 2940974,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016083447.130, "dur": 1.367, + "args": { + "External id": 2940975,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083450.174, "dur": 22.632, + "args": { + "External id": 2940976,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083482.663, "dur": 144.694, + "args": { + "External id": 2940977,"Record function id": 0, "Sequence number": 29294783, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083484.044, "dur": 138.107, + "args": { + "External id": 2940978,"Sequence number": 29294783, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 1336755, "tid": 1381158, "ts": 1555016083484.044, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016083488.268, "dur": 133.605, + "args": { + "External id": 2940979,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016083491.868, "dur": 17.644, + "args": { + "External id": 2940980,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016083492.522, "dur": 2.843, + "args": { + "External id": 2940981,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083496.101, "dur": 13.162, + "args": { + "External id": 2940982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083497.001, "dur": 11.853, + "args": { + "External id": 2940983,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016083513.018, "dur": 5.265, + "args": { + "External id": 2940984,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016083514.250, "dur": 3.844, + "args": { + "External id": 2940985,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083518.918, "dur": 102.256, + "args": { + "External id": 2940986,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083633.727, "dur": 111.110, + "args": { + "External id": 2940987,"Record function id": 0, "Sequence number": 29294782, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083634.656, "dur": 106.188, + "args": { + "External id": 2940988,"Sequence number": 29294782, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 1336755, "tid": 1381158, "ts": 1555016083634.656, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016083636.902, "dur": 103.675, + "args": { + "External id": 2940989,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016083640.242, "dur": 16.291, + "args": { + "External id": 2940990,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016083640.897, "dur": 2.499, + "args": { + "External id": 2940991,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083644.002, "dur": 12.293, + "args": { + "External id": 2940992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083645.121, "dur": 10.874, + "args": { + "External id": 2940993,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016083659.764, "dur": 4.575, + "args": { + "External id": 2940994,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016083663.377, "dur": 0.831, + "args": { + "External id": 2940995,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083664.897, "dur": 74.865, + "args": { + "External id": 2940996,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083749.113, "dur": 41.894, + "args": { + "External id": 2940997,"Record function id": 0, "Sequence number": 29294781, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016083750.881, "dur": 1.285, + "args": { + "External id": 2940998,"Sequence number": 29294781, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 1336755, "tid": 1381158, "ts": 1555016083750.881, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016083754.732, "dur": 30.862, + "args": { + "External id": 2940999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016083756.466, "dur": 28.615, + "args": { + "External id": 2941000,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016083766.396, "dur": 0.745, + "args": { + "External id": 2941001,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016083796.296, "dur": 2238.737, + "args": { + "External id": 2941002,"Record function id": 0, "Sequence number": 29294779, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016083797.778, "dur": 2170.715, + "args": { + "External id": 2941003,"Sequence number": 29294779, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 1336755, "tid": 1381158, "ts": 1555016083797.778, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016083837.554, "dur": 2.733, + "args": { + "External id": 2941004,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016083842.559, "dur": 2038.545, + "args": { + "External id": 2941005,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016083844.202, "dur": 2036.659, + "args": { + "External id": 2941006,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016083846.739, "dur": 7.036, + "args": { + "External id": 2941007,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016083857.182, "dur": 2022.642, + "args": { + "External id": 2941008,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336755, "tid": 1381158, + "ts": 1555016085884.687, "dur": 0.539, + "args": { + "External id": 2941009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016085886.635, "dur": 2.421, + "args": { + "External id": 2941010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016085887.723, "dur": 1.201, + "args": { + "External id": 2941011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1381158, + "ts": 1555016085893.119, "dur": 22.798, + "args": { + "External id": 2941012,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336755, "tid": 1381158, + "ts": 1555016085921.214, "dur": 39.301, + "args": { + "External id": 2941013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1381158, + "ts": 1555016085922.147, "dur": 38.124, + "args": { + "External id": 2941014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1381158, + "ts": 1555016085923.241, "dur": 36.781, + "args": { + "External id": 2941015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016085980.126, "dur": 49.005, + "args": { + "External id": 2941016,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086043.619, "dur": 18.369, + "args": { + "External id": 2941017,"Record function id": 0, "Sequence number": 29294778, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086047.703, "dur": 10.476, + "args": { + "External id": 2941018,"Sequence number": 29294778, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 1336755, "tid": 1381158, "ts": 1555016086047.703, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016086052.940, "dur": 5.029, + "args": { + "External id": 2941019,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016086054.127, "dur": 3.646, + "args": { + "External id": 2941020,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086065.243, "dur": 88.200, + "args": { + "External id": 2941021,"Record function id": 0, "Sequence number": 29294777, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086066.361, "dur": 68.571, + "args": { + "External id": 2941022,"Sequence number": 29294777, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 1336755, "tid": 1381158, "ts": 1555016086066.361, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016086068.553, "dur": 66.075, + "args": { + "External id": 2941023,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016086072.981, "dur": 24.711, + "args": { + "External id": 2941024,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016086074.537, "dur": 5.328, + "args": { + "External id": 2941025,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086080.664, "dur": 16.752, + "args": { + "External id": 2941026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086082.024, "dur": 14.994, + "args": { + "External id": 2941027,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016086098.919, "dur": 2.708, + "args": { + "External id": 2941028,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016086100.432, "dur": 0.923, + "args": { + "External id": 2941029,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086105.824, "dur": 28.031, + "args": { + "External id": 2941030,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086160.241, "dur": 67.410, + "args": { + "External id": 2941031,"Record function id": 0, "Sequence number": 29294776, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086161.592, "dur": 60.418, + "args": { + "External id": 2941032,"Sequence number": 29294776, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 1336755, "tid": 1381158, "ts": 1555016086161.592, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016086166.920, "dur": 54.804, + "args": { + "External id": 2941033,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016086168.760, "dur": 24.270, + "args": { + "External id": 2941034,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016086169.700, "dur": 3.166, + "args": { + "External id": 2941035,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086173.541, "dur": 19.224, + "args": { + "External id": 2941036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086174.405, "dur": 17.931, + "args": { + "External id": 2941037,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016086194.419, "dur": 9.445, + "args": { + "External id": 2941038,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016086202.237, "dur": 1.101, + "args": { + "External id": 2941039,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086204.547, "dur": 16.609, + "args": { + "External id": 2941040,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086231.248, "dur": 93.865, + "args": { + "External id": 2941041,"Record function id": 0, "Sequence number": 29294775, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086232.547, "dur": 88.583, + "args": { + "External id": 2941042,"Sequence number": 29294775, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 1336755, "tid": 1381158, "ts": 1555016086232.547, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016086234.035, "dur": 86.796, + "args": { + "External id": 2941043,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016086235.177, "dur": 18.827, + "args": { + "External id": 2941044,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016086236.028, "dur": 2.386, + "args": { + "External id": 2941045,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086239.007, "dur": 14.741, + "args": { + "External id": 2941046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086240.002, "dur": 13.440, + "args": { + "External id": 2941047,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016086254.940, "dur": 5.831, + "args": { + "External id": 2941048,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016086259.733, "dur": 0.808, + "args": { + "External id": 2941049,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086261.408, "dur": 58.784, + "args": { + "External id": 2941050,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086330.957, "dur": 90.354, + "args": { + "External id": 2941051,"Record function id": 0, "Sequence number": 29294774, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086331.755, "dur": 70.990, + "args": { + "External id": 2941052,"Sequence number": 29294774, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 1336755, "tid": 1381158, "ts": 1555016086331.755, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016086335.297, "dur": 67.127, + "args": { + "External id": 2941053,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016086336.079, "dur": 15.518, + "args": { + "External id": 2941054,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016086336.848, "dur": 1.783, + "args": { + "External id": 2941055,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086339.547, "dur": 11.802, + "args": { + "External id": 2941056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086340.435, "dur": 10.614, + "args": { + "External id": 2941057,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016086352.298, "dur": 1.663, + "args": { + "External id": 2941058,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016086353.184, "dur": 0.632, + "args": { + "External id": 2941059,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086354.662, "dur": 47.111, + "args": { + "External id": 2941060,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086406.863, "dur": 13.224, + "args": { + "External id": 2941061,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086427.265, "dur": 37.954, + "args": { + "External id": 2941062,"Record function id": 0, "Sequence number": 29294773, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016086428.306, "dur": 1.241, + "args": { + "External id": 2941063,"Sequence number": 29294773, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 1336755, "tid": 1381158, "ts": 1555016086428.306, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016086432.129, "dur": 27.706, + "args": { + "External id": 2941064,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016086433.866, "dur": 25.468, + "args": { + "External id": 2941065,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016086442.616, "dur": 0.837, + "args": { + "External id": 2941066,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016086469.962, "dur": 3070.672, + "args": { + "External id": 2941067,"Record function id": 0, "Sequence number": 29294771, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016086473.445, "dur": 3037.847, + "args": { + "External id": 2941068,"Sequence number": 29294771, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 1336755, "tid": 1381158, "ts": 1555016086473.445, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016086504.253, "dur": 2.096, + "args": { + "External id": 2941069,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016086508.338, "dur": 2916.036, + "args": { + "External id": 2941070,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016086509.650, "dur": 2914.285, + "args": { + "External id": 2941071,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016086511.860, "dur": 6.221, + "args": { + "External id": 2941072,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016086519.001, "dur": 2903.842, + "args": { + "External id": 2941073,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336755, "tid": 1381158, + "ts": 1555016089427.176, "dur": 0.518, + "args": { + "External id": 2941074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089428.806, "dur": 4.401, + "args": { + "External id": 2941075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089432.108, "dur": 0.966, + "args": { + "External id": 2941076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1381158, + "ts": 1555016089436.904, "dur": 20.658, + "args": { + "External id": 2941077,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336755, "tid": 1381158, + "ts": 1555016089465.036, "dur": 39.349, + "args": { + "External id": 2941078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1381158, + "ts": 1555016089466.124, "dur": 38.099, + "args": { + "External id": 2941079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1381158, + "ts": 1555016089467.232, "dur": 36.694, + "args": { + "External id": 2941080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089520.222, "dur": 16.232, + "args": { + "External id": 2941081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089550.286, "dur": 11.551, + "args": { + "External id": 2941082,"Record function id": 0, "Sequence number": 29294770, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089551.933, "dur": 6.374, + "args": { + "External id": 2941083,"Sequence number": 29294770, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 1336755, "tid": 1381158, "ts": 1555016089551.933, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016089554.002, "dur": 4.097, + "args": { + "External id": 2941084,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016089555.149, "dur": 2.800, + "args": { + "External id": 2941085,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089565.153, "dur": 71.068, + "args": { + "External id": 2941086,"Record function id": 0, "Sequence number": 29294769, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089569.026, "dur": 61.197, + "args": { + "External id": 2941087,"Sequence number": 29294769, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 1336755, "tid": 1381158, "ts": 1555016089569.026, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016089573.941, "dur": 55.935, + "args": { + "External id": 2941088,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016089576.290, "dur": 20.361, + "args": { + "External id": 2941089,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016089577.485, "dur": 2.620, + "args": { + "External id": 2941090,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089580.775, "dur": 15.619, + "args": { + "External id": 2941091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089582.136, "dur": 13.776, + "args": { + "External id": 2941092,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016089597.644, "dur": 5.187, + "args": { + "External id": 2941093,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016089601.525, "dur": 1.082, + "args": { + "External id": 2941094,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089603.757, "dur": 25.339, + "args": { + "External id": 2941095,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089640.186, "dur": 55.799, + "args": { + "External id": 2941096,"Record function id": 0, "Sequence number": 29294768, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089641.433, "dur": 50.441, + "args": { + "External id": 2941097,"Sequence number": 29294768, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 1336755, "tid": 1381158, "ts": 1555016089641.433, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016089643.550, "dur": 47.928, + "args": { + "External id": 2941098,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016089647.591, "dur": 18.734, + "args": { + "External id": 2941099,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016089648.306, "dur": 2.290, + "args": { + "External id": 2941100,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089651.254, "dur": 14.828, + "args": { + "External id": 2941101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089652.067, "dur": 13.695, + "args": { + "External id": 2941102,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016089667.311, "dur": 8.695, + "args": { + "External id": 2941103,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016089672.243, "dur": 3.397, + "args": { + "External id": 2941104,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089676.658, "dur": 14.414, + "args": { + "External id": 2941105,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089699.465, "dur": 97.134, + "args": { + "External id": 2941106,"Record function id": 0, "Sequence number": 29294767, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089700.724, "dur": 91.994, + "args": { + "External id": 2941107,"Sequence number": 29294767, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 1336755, "tid": 1381158, "ts": 1555016089700.724, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016089702.620, "dur": 89.748, + "args": { + "External id": 2941108,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016089703.751, "dur": 21.774, + "args": { + "External id": 2941109,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016089707.342, "dur": 1.873, + "args": { + "External id": 2941110,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089710.031, "dur": 15.226, + "args": { + "External id": 2941111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089710.967, "dur": 13.958, + "args": { + "External id": 2941112,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016089726.315, "dur": 4.400, + "args": { + "External id": 2941113,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016089730.073, "dur": 0.491, + "args": { + "External id": 2941114,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089731.362, "dur": 60.237, + "args": { + "External id": 2941115,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089800.452, "dur": 95.321, + "args": { + "External id": 2941116,"Record function id": 0, "Sequence number": 29294766, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089801.784, "dur": 75.625, + "args": { + "External id": 2941117,"Sequence number": 29294766, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 1336755, "tid": 1381158, "ts": 1555016089801.784, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016089805.293, "dur": 71.869, + "args": { + "External id": 2941118,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016089805.965, "dur": 18.306, + "args": { + "External id": 2941119,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016089808.728, "dur": 1.746, + "args": { + "External id": 2941120,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089811.001, "dur": 13.013, + "args": { + "External id": 2941121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089811.845, "dur": 11.837, + "args": { + "External id": 2941122,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016089825.061, "dur": 3.799, + "args": { + "External id": 2941123,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016089828.207, "dur": 0.474, + "args": { + "External id": 2941124,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089829.817, "dur": 46.692, + "args": { + "External id": 2941125,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016089880.860, "dur": 13.816, + "args": { + "External id": 2941126,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089899.803, "dur": 30.482, + "args": { + "External id": 2941127,"Record function id": 0, "Sequence number": 29294765, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016089900.861, "dur": 0.839, + "args": { + "External id": 2941128,"Sequence number": 29294765, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 1336755, "tid": 1381158, "ts": 1555016089900.861, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016089903.700, "dur": 21.160, + "args": { + "External id": 2941129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016089904.911, "dur": 19.415, + "args": { + "External id": 2941130,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016089908.639, "dur": 0.460, + "args": { + "External id": 2941131,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016089934.730, "dur": 3136.509, + "args": { + "External id": 2941132,"Record function id": 0, "Sequence number": 29294764, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016089947.164, "dur": 3089.685, + "args": { + "External id": 2941133,"Sequence number": 29294764, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 1336755, "tid": 1381158, "ts": 1555016089947.164, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016089975.838, "dur": 1.849, + "args": { + "External id": 2941134,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016089979.268, "dur": 2950.556, + "args": { + "External id": 2941135,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016089980.251, "dur": 2949.302, + "args": { + "External id": 2941136,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016090025.039, "dur": 4.354, + "args": { + "External id": 2941137,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016090030.575, "dur": 2898.031, + "args": { + "External id": 2941138,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336755, "tid": 1381158, + "ts": 1555016092932.535, "dur": 0.298, + "args": { + "External id": 2941139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016092933.968, "dur": 2.245, + "args": { + "External id": 2941140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336755, "tid": 1381158, + "ts": 1555016092935.097, "dur": 0.974, + "args": { + "External id": 2941141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1381158, + "ts": 1555016092939.735, "dur": 23.915, + "args": { + "External id": 2941142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336755, "tid": 1381158, + "ts": 1555016092967.671, "dur": 61.642, + "args": { + "External id": 2941143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1381158, + "ts": 1555016092968.756, "dur": 60.369, + "args": { + "External id": 2941144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1381158, + "ts": 1555016092969.717, "dur": 58.798, + "args": { + "External id": 2941145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093050.107, "dur": 16.136, + "args": { + "External id": 2941146,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016093085.891, "dur": 14.238, + "args": { + "External id": 2941147,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016093088.804, "dur": 9.572, + "args": { + "External id": 2941148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016093092.851, "dur": 4.599, + "args": { + "External id": 2941149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016093093.576, "dur": 3.772, + "args": { + "External id": 2941150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093103.517, "dur": 12.212, + "args": { + "External id": 2941151,"Record function id": 0, "Sequence number": 29294763, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093104.810, "dur": 8.165, + "args": { + "External id": 2941152,"Sequence number": 29294763, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 1336755, "tid": 1381158, "ts": 1555016093104.810, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016093107.073, "dur": 5.714, + "args": { + "External id": 2941153,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016093110.406, "dur": 2.288, + "args": { + "External id": 2941154,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093119.022, "dur": 85.682, + "args": { + "External id": 2941155,"Record function id": 0, "Sequence number": 29294762, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093119.962, "dur": 78.233, + "args": { + "External id": 2941156,"Sequence number": 29294762, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 1336755, "tid": 1381158, "ts": 1555016093119.962, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016093122.104, "dur": 75.710, + "args": { + "External id": 2941157,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016093127.160, "dur": 34.334, + "args": { + "External id": 2941158,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016093128.438, "dur": 2.897, + "args": { + "External id": 2941159,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093132.435, "dur": 28.758, + "args": { + "External id": 2941160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093133.661, "dur": 26.719, + "args": { + "External id": 2941161,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016093163.372, "dur": 5.071, + "args": { + "External id": 2941162,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016093167.300, "dur": 0.873, + "args": { + "External id": 2941163,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093169.319, "dur": 27.576, + "args": { + "External id": 2941164,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093212.794, "dur": 63.480, + "args": { + "External id": 2941165,"Record function id": 0, "Sequence number": 29294761, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093213.979, "dur": 58.454, + "args": { + "External id": 2941166,"Sequence number": 29294761, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 1336755, "tid": 1381158, "ts": 1555016093213.979, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016093216.477, "dur": 55.694, + "args": { + "External id": 2941167,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016093218.046, "dur": 25.705, + "args": { + "External id": 2941168,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016093226.106, "dur": 3.034, + "args": { + "External id": 2941169,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093229.938, "dur": 13.583, + "args": { + "External id": 2941170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093230.879, "dur": 12.287, + "args": { + "External id": 2941171,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016093244.674, "dur": 4.334, + "args": { + "External id": 2941172,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016093247.703, "dur": 0.929, + "args": { + "External id": 2941173,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093249.596, "dur": 22.097, + "args": { + "External id": 2941174,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093279.968, "dur": 97.260, + "args": { + "External id": 2941175,"Record function id": 0, "Sequence number": 29294760, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093281.270, "dur": 92.735, + "args": { + "External id": 2941176,"Sequence number": 29294760, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 1336755, "tid": 1381158, "ts": 1555016093281.270, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016093283.217, "dur": 90.490, + "args": { + "External id": 2941177,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016093287.079, "dur": 19.114, + "args": { + "External id": 2941178,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016093287.877, "dur": 1.747, + "args": { + "External id": 2941179,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093292.870, "dur": 13.078, + "args": { + "External id": 2941180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093293.669, "dur": 11.958, + "args": { + "External id": 2941181,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016093307.278, "dur": 4.768, + "args": { + "External id": 2941182,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016093308.401, "dur": 3.473, + "args": { + "External id": 2941183,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093312.738, "dur": 60.189, + "args": { + "External id": 2941184,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093380.877, "dur": 103.090, + "args": { + "External id": 2941185,"Record function id": 0, "Sequence number": 29294759, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093381.751, "dur": 82.649, + "args": { + "External id": 2941186,"Sequence number": 29294759, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 1336755, "tid": 1381158, "ts": 1555016093381.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016093383.454, "dur": 80.625, + "args": { + "External id": 2941187,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1381158, + "ts": 1555016093384.420, "dur": 30.000, + "args": { + "External id": 2941188,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016093389.480, "dur": 1.894, + "args": { + "External id": 2941189,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093396.222, "dur": 17.930, + "args": { + "External id": 2941190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093401.395, "dur": 12.443, + "args": { + "External id": 2941191,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016093415.092, "dur": 1.999, + "args": { + "External id": 2941192,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016093416.162, "dur": 0.710, + "args": { + "External id": 2941193,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093417.716, "dur": 45.711, + "args": { + "External id": 2941194,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016093468.334, "dur": 13.591, + "args": { + "External id": 2941195,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016093489.638, "dur": 441.040, + "args": { + "External id": 2941196,"Record function id": 0, "Sequence number": 29294758, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016093491.314, "dur": 430.545, + "args": { + "External id": 2941197,"Sequence number": 29294758, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 1336755, "tid": 1381158, "ts": 1555016093491.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093719.051, "dur": 53.190, + "args": { + "External id": 2941198,"kernel_hash": "c2gvqmosewyhijah2muqqmydqhvtlhdmifu4z6bgh7dyg6cqzjsd", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "262144", "2048", "1", "1986", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2g/c2gvqmosewyhijah2muqqmydqhvtlhdmifu4z6bgh7dyg6cqzjsd.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[262144, 2048], [2048], [262144, 2048], [262144, 2048], [132, 2048], [262144], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093818.010, "dur": 30.523, + "args": { + "External id": 2941199,"kernel_hash": "cuwfdwbs7qkmol6uuogh3ly4vqbjj6y23zwalr5ud6c7rpfwqwjj", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/uw/cuwfdwbs7qkmol6uuogh3ly4vqbjj6y23zwalr5ud6c7rpfwqwjj.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016093867.644, "dur": 23.087, + "args": { + "External id": 2941200,"kernel_hash": "cgfvfjbxj4xfvr5s2uycho2evpqgtxpczdpzf42nojddveorwrmt", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/gf/cgfvfjbxj4xfvr5s2uycho2evpqgtxpczdpzf42nojddveorwrmt.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016093939.027, "dur": 10.466, + "args": { + "External id": 2941201,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016093941.107, "dur": 7.605, + "args": { + "External id": 2941202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016093943.829, "dur": 4.014, + "args": { + "External id": 2941203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016093944.925, "dur": 2.797, + "args": { + "External id": 2941204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093955.887, "dur": 76.974, + "args": { + "External id": 2941205,"Record function id": 0, "Sequence number": 29294757, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016093957.112, "dur": 61.790, + "args": { + "External id": 2941206,"Sequence number": 29294757, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 1336755, "tid": 1381158, "ts": 1555016093957.112, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016093959.479, "dur": 6.846, + "args": { + "External id": 2941207,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016093963.864, "dur": 0.818, + "args": { + "External id": 2941208,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016093967.149, "dur": 7.838, + "args": { + "External id": 2941209,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016093973.523, "dur": 0.695, + "args": { + "External id": 2941210,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016093975.505, "dur": 2.745, + "args": { + "External id": 2941211,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016093976.652, "dur": 0.689, + "args": { + "External id": 2941212,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1381158, + "ts": 1555016093978.918, "dur": 39.173, + "args": { + "External id": 2941213,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094015.889, "dur": 0.841, + "args": { + "External id": 2941214,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094038.442, "dur": 9.346, + "args": { + "External id": 2941215,"Record function id": 0, "Sequence number": 29294756, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094039.690, "dur": 1.614, + "args": { + "External id": 2941216,"Sequence number": 29294756, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 1336755, "tid": 1381158, "ts": 1555016094039.690, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016094052.424, "dur": 648.543, + "args": { + "External id": 2941217,"Record function id": 0, "Sequence number": 29294755, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016094053.587, "dur": 629.355, + "args": { + "External id": 2941218,"Sequence number": 29294755, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 1336755, "tid": 1381158, "ts": 1555016094053.587, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016094110.561, "dur": 12.837, + "args": { + "External id": 2941219,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016094118.370, "dur": 4.685, + "args": { + "External id": 2941220,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016094130.769, "dur": 7.434, + "args": { + "External id": 2941221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016094134.827, "dur": 2.502, + "args": { + "External id": 2941222,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094136.333, "dur": 0.764, + "args": { + "External id": 2941223,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1381158, + "ts": 1555016094160.095, "dur": 169.796, + "args": { + "External id": 2941224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016094161.496, "dur": 9.196, + "args": { + "External id": 2941225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016094162.486, "dur": 7.164, + "args": { + "External id": 2941226,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094166.194, "dur": 3.160, + "args": { + "External id": 2941227,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1381158, + "ts": 1555016094172.847, "dur": 156.224, + "args": { + "External id": 2941228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016094176.033, "dur": 152.085, + "args": { + "External id": 2941229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016094337.659, "dur": 6.742, + "args": { + "External id": 2941230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094341.695, "dur": 2.581, + "args": { + "External id": 2941231,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016094391.838, "dur": 4.924, + "args": { + "External id": 2941232,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016094397.939, "dur": 5.111, + "args": { + "External id": 2941233,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016094406.099, "dur": 2.235, + "args": { + "External id": 2941234,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016094453.420, "dur": 2.050, + "args": { + "External id": 2941235,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094453.924, "dur": 1.393, + "args": { + "External id": 2941236,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336755, "tid": 1381158, + "ts": 1555016094490.780, "dur": 168.647, + "args": { + "External id": 2941237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016094499.330, "dur": 11.026, + "args": { + "External id": 2941238,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094505.748, "dur": 1.115, + "args": { + "External id": 2941239,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016094512.520, "dur": 10.494, + "args": { + "External id": 2941240,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094518.757, "dur": 3.396, + "args": { + "External id": 2941241,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016094525.046, "dur": 3.879, + "args": { + "External id": 2941242,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094528.230, "dur": 0.357, + "args": { + "External id": 2941243,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016094529.412, "dur": 4.647, + "args": { + "External id": 2941244,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094532.812, "dur": 0.740, + "args": { + "External id": 2941245,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016094539.115, "dur": 4.123, + "args": { + "External id": 2941246,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094542.373, "dur": 0.555, + "args": { + "External id": 2941247,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016094546.196, "dur": 8.182, + "args": { + "External id": 2941248,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016094552.143, "dur": 2.057, + "args": { + "External id": 2941249,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016094554.989, "dur": 4.050, + "args": { + "External id": 2941250,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094557.945, "dur": 0.806, + "args": { + "External id": 2941251,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016094559.971, "dur": 2.513, + "args": { + "External id": 2941252,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094561.024, "dur": 1.376, + "args": { + "External id": 2941253,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016094564.605, "dur": 79.583, + "args": { + "External id": 2941254,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094645.930, "dur": 3.873, + "args": { + "External id": 2941255,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016094652.618, "dur": 2.030, + "args": { + "External id": 2941256,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094653.678, "dur": 0.455, + "args": { + "External id": 2941257,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094656.977, "dur": 1.039, + "args": { + "External id": 2941258,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016094713.401, "dur": 9.990, + "args": { + "External id": 2941259,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016094715.556, "dur": 6.815, + "args": { + "External id": 2941260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016094717.930, "dur": 3.523, + "args": { + "External id": 2941261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016094718.967, "dur": 2.384, + "args": { + "External id": 2941262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094727.446, "dur": 11.975, + "args": { + "External id": 2941263,"Record function id": 0, "Sequence number": 29294754, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094728.898, "dur": 6.446, + "args": { + "External id": 2941264,"Sequence number": 29294754, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 1336755, "tid": 1381158, "ts": 1555016094728.898, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016094733.161, "dur": 1.929, + "args": { + "External id": 2941265,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094734.177, "dur": 0.759, + "args": { + "External id": 2941266,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094743.933, "dur": 164.802, + "args": { + "External id": 2941267,"Record function id": 0, "Sequence number": 29294753, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094744.879, "dur": 155.550, + "args": { + "External id": 2941268,"Sequence number": 29294753, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 1336755, "tid": 1381158, "ts": 1555016094744.879, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016094753.192, "dur": 4.567, + "args": { + "External id": 2941269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016094754.778, "dur": 2.388, + "args": { + "External id": 2941270,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094756.240, "dur": 0.742, + "args": { + "External id": 2941271,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016094759.040, "dur": 62.946, + "args": { + "External id": 2941272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016094823.203, "dur": 5.661, + "args": { + "External id": 2941273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016094824.018, "dur": 4.233, + "args": { + "External id": 2941274,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094827.244, "dur": 0.806, + "args": { + "External id": 2941275,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016094830.722, "dur": 6.338, + "args": { + "External id": 2941276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016094831.820, "dur": 4.777, + "args": { + "External id": 2941277,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094835.920, "dur": 0.587, + "args": { + "External id": 2941278,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016094837.726, "dur": 61.673, + "args": { + "External id": 2941279,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094913.606, "dur": 6.857, + "args": { + "External id": 2941280,"Record function id": 0, "Sequence number": 29294752, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094914.750, "dur": 4.103, + "args": { + "External id": 2941281,"Sequence number": 29294752, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 1336755, "tid": 1381158, "ts": 1555016094914.750, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016094916.872, "dur": 1.847, + "args": { + "External id": 2941282,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094917.638, "dur": 0.954, + "args": { + "External id": 2941283,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094924.150, "dur": 12.828, + "args": { + "External id": 2941284,"Record function id": 0, "Sequence number": 29294751, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094927.193, "dur": 6.421, + "args": { + "External id": 2941285,"Sequence number": 29294751, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 1336755, "tid": 1381158, "ts": 1555016094927.193, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016094928.279, "dur": 5.134, + "args": { + "External id": 2941286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016094928.832, "dur": 4.135, + "args": { + "External id": 2941287,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094932.393, "dur": 0.491, + "args": { + "External id": 2941288,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016094941.006, "dur": 5.628, + "args": { + "External id": 2941289,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016094942.497, "dur": 3.631, + "args": { + "External id": 2941290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016094943.849, "dur": 1.975, + "args": { + "External id": 2941291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016094944.495, "dur": 1.218, + "args": { + "External id": 2941292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094949.974, "dur": 10.595, + "args": { + "External id": 2941293,"Record function id": 0, "Sequence number": 29294750, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094951.368, "dur": 5.300, + "args": { + "External id": 2941294,"Sequence number": 29294750, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 1336755, "tid": 1381158, "ts": 1555016094951.368, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016094952.833, "dur": 3.698, + "args": { + "External id": 2941295,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016094955.595, "dur": 0.831, + "args": { + "External id": 2941296,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094963.618, "dur": 171.093, + "args": { + "External id": 2941297,"Record function id": 0, "Sequence number": 29294749, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016094964.692, "dur": 156.119, + "args": { + "External id": 2941298,"Sequence number": 29294749, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 1336755, "tid": 1381158, "ts": 1555016094964.692, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016094967.338, "dur": 2.670, + "args": { + "External id": 2941299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016094968.015, "dur": 1.576, + "args": { + "External id": 2941300,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016094969.008, "dur": 0.488, + "args": { + "External id": 2941301,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016094973.690, "dur": 75.871, + "args": { + "External id": 2941302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016095051.633, "dur": 7.509, + "args": { + "External id": 2941303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016095052.580, "dur": 5.815, + "args": { + "External id": 2941304,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095056.835, "dur": 1.426, + "args": { + "External id": 2941305,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016095060.780, "dur": 7.402, + "args": { + "External id": 2941306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016095061.546, "dur": 6.093, + "args": { + "External id": 2941307,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095064.425, "dur": 3.101, + "args": { + "External id": 2941308,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016095068.764, "dur": 51.313, + "args": { + "External id": 2941309,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095156.147, "dur": 44.530, + "args": { + "External id": 2941310,"Record function id": 0, "Sequence number": 29294748, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095158.323, "dur": 6.773, + "args": { + "External id": 2941311,"Sequence number": 29294748, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 1336755, "tid": 1381158, "ts": 1555016095158.323, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016095159.891, "dur": 5.040, + "args": { + "External id": 2941312,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095162.773, "dur": 1.889, + "args": { + "External id": 2941313,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016095169.630, "dur": 27.878, + "args": { + "External id": 2941314,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095206.352, "dur": 11.761, + "args": { + "External id": 2941315,"Record function id": 0, "Sequence number": 29294747, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095207.351, "dur": 7.721, + "args": { + "External id": 2941316,"Sequence number": 29294747, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 1336755, "tid": 1381158, "ts": 1555016095207.351, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016095208.622, "dur": 6.226, + "args": { + "External id": 2941317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016095212.165, "dur": 2.165, + "args": { + "External id": 2941318,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095213.401, "dur": 0.755, + "args": { + "External id": 2941319,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016095222.222, "dur": 6.832, + "args": { + "External id": 2941320,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016095223.735, "dur": 4.807, + "args": { + "External id": 2941321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016095225.222, "dur": 2.536, + "args": { + "External id": 2941322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016095226.035, "dur": 1.624, + "args": { + "External id": 2941323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016095233.560, "dur": 519.181, + "args": { + "External id": 2941324,"Record function id": 0, "Sequence number": 29294746, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016095235.146, "dur": 495.764, + "args": { + "External id": 2941325,"Sequence number": 29294746, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 1336755, "tid": 1381158, "ts": 1555016095235.146, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1381158, + "ts": 1555016095266.772, "dur": 37.411, + "args": { + "External id": 2941326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016095269.084, "dur": 34.910, + "args": { + "External id": 2941327,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016095272.375, "dur": 5.757, + "args": { + "External id": 2941328,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016095274.904, "dur": 2.748, + "args": { + "External id": 2941329,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016095279.457, "dur": 23.818, + "args": { + "External id": 2941330,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016095317.647, "dur": 4.027, + "args": { + "External id": 2941331,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095320.188, "dur": 1.355, + "args": { + "External id": 2941332,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016095325.245, "dur": 4.431, + "args": { + "External id": 2941333,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095326.052, "dur": 3.513, + "args": { + "External id": 2941334,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016095347.772, "dur": 2.016, + "args": { + "External id": 2941335,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016095367.196, "dur": 4.446, + "args": { + "External id": 2941336,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095609.872, "dur": 2.217, + "args": { + "External id": 2941337,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016095618.436, "dur": 37.409, + "args": { + "External id": 2941338,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095630.291, "dur": 0.847, + "args": { + "External id": 2941339,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016095661.743, "dur": 31.908, + "args": { + "External id": 2941340,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016095663.765, "dur": 29.679, + "args": { + "External id": 2941341,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095667.736, "dur": 4.916, + "args": { + "External id": 2941342,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016095673.945, "dur": 19.019, + "args": { + "External id": 2941343,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016095700.186, "dur": 2.521, + "args": { + "External id": 2941344,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095701.250, "dur": 1.351, + "args": { + "External id": 2941345,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016095710.036, "dur": 5.017, + "args": { + "External id": 2941346,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095713.417, "dur": 1.523, + "args": { + "External id": 2941347,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016095717.251, "dur": 4.511, + "args": { + "External id": 2941348,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095717.942, "dur": 3.720, + "args": { + "External id": 2941349,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016095762.022, "dur": 7.808, + "args": { + "External id": 2941350,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016095763.835, "dur": 5.367, + "args": { + "External id": 2941351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016095765.688, "dur": 2.760, + "args": { + "External id": 2941352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016095766.618, "dur": 1.691, + "args": { + "External id": 2941353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095773.437, "dur": 10.704, + "args": { + "External id": 2941354,"Record function id": 0, "Sequence number": 29294745, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095774.352, "dur": 5.791, + "args": { + "External id": 2941355,"Sequence number": 29294745, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 1336755, "tid": 1381158, "ts": 1555016095774.352, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016095778.171, "dur": 1.805, + "args": { + "External id": 2941356,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095778.698, "dur": 1.110, + "args": { + "External id": 2941357,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095787.565, "dur": 148.376, + "args": { + "External id": 2941358,"Record function id": 0, "Sequence number": 29294744, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095791.346, "dur": 137.108, + "args": { + "External id": 2941359,"Sequence number": 29294744, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 1336755, "tid": 1381158, "ts": 1555016095791.346, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016095795.437, "dur": 4.343, + "args": { + "External id": 2941360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016095796.781, "dur": 2.457, + "args": { + "External id": 2941361,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095798.095, "dur": 0.959, + "args": { + "External id": 2941362,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016095801.153, "dur": 61.728, + "args": { + "External id": 2941363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016095864.159, "dur": 8.364, + "args": { + "External id": 2941364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016095864.830, "dur": 7.030, + "args": { + "External id": 2941365,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095868.532, "dur": 3.141, + "args": { + "External id": 2941366,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016095874.342, "dur": 5.684, + "args": { + "External id": 2941367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016095875.386, "dur": 4.241, + "args": { + "External id": 2941368,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095878.906, "dur": 0.588, + "args": { + "External id": 2941369,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016095880.666, "dur": 46.840, + "args": { + "External id": 2941370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095940.441, "dur": 12.534, + "args": { + "External id": 2941371,"Record function id": 0, "Sequence number": 29294743, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095941.335, "dur": 8.205, + "args": { + "External id": 2941372,"Sequence number": 29294743, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 1336755, "tid": 1381158, "ts": 1555016095941.335, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016095942.698, "dur": 6.671, + "args": { + "External id": 2941373,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016095946.010, "dur": 3.207, + "args": { + "External id": 2941374,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095956.325, "dur": 11.382, + "args": { + "External id": 2941375,"Record function id": 0, "Sequence number": 29294742, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095957.285, "dur": 6.531, + "args": { + "External id": 2941376,"Sequence number": 29294742, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 1336755, "tid": 1381158, "ts": 1555016095957.285, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016095958.350, "dur": 5.247, + "args": { + "External id": 2941377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016095959.252, "dur": 3.825, + "args": { + "External id": 2941378,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016095962.458, "dur": 0.478, + "args": { + "External id": 2941379,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016095971.395, "dur": 5.240, + "args": { + "External id": 2941380,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016095972.830, "dur": 3.337, + "args": { + "External id": 2941381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016095973.978, "dur": 1.956, + "args": { + "External id": 2941382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016095974.656, "dur": 1.182, + "args": { + "External id": 2941383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095979.673, "dur": 51.850, + "args": { + "External id": 2941384,"Record function id": 0, "Sequence number": 29294741, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016095983.052, "dur": 44.772, + "args": { + "External id": 2941385,"Sequence number": 29294741, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 1336755, "tid": 1381158, "ts": 1555016095983.052, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016096024.040, "dur": 3.606, + "args": { + "External id": 2941386,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016096025.002, "dur": 2.354, + "args": { + "External id": 2941387,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016096038.779, "dur": 411.183, + "args": { + "External id": 2941388,"Record function id": 0, "Sequence number": 29294740, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016096040.472, "dur": 381.809, + "args": { + "External id": 2941389,"Sequence number": 29294740, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 1336755, "tid": 1381158, "ts": 1555016096040.472, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016096063.396, "dur": 11.580, + "args": { + "External id": 2941390,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096070.385, "dur": 4.131, + "args": { + "External id": 2941391,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016096077.068, "dur": 3.102, + "args": { + "External id": 2941392,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096078.004, "dur": 1.976, + "args": { + "External id": 2941393,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016096081.931, "dur": 8.482, + "args": { + "External id": 2941394,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096085.576, "dur": 4.658, + "args": { + "External id": 2941395,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016096125.588, "dur": 268.154, + "args": { + "External id": 2941396,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016096272.028, "dur": 4.625, + "args": { + "External id": 2941397,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016096281.064, "dur": 4.225, + "args": { + "External id": 2941398,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016096408.666, "dur": 4.415, + "args": { + "External id": 2941399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016096416.214, "dur": 0.559, + "args": { + "External id": 2941400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016096418.614, "dur": 0.780, + "args": { + "External id": 2941401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016096459.179, "dur": 255.600, + "args": { + "External id": 2941402,"Record function id": 0, "Sequence number": 29294739, "Fwd thread id": 1, "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016096460.818, "dur": 246.071, + "args": { + "External id": 2941403,"Sequence number": 29294739, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 474 + } + }, + { + "ph": "f", "id": 49, "pid": 1336755, "tid": 1381158, "ts": 1555016096460.818, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016096485.518, "dur": 51.635, + "args": { + "External id": 2941404,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096492.287, "dur": 2.990, + "args": { + "External id": 2941405,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016096496.671, "dur": 39.838, + "args": { + "External id": 2941406,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016096547.961, "dur": 3.835, + "args": { + "External id": 2941407,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096549.361, "dur": 2.084, + "args": { + "External id": 2941408,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016096721.987, "dur": 154.959, + "args": { + "External id": 2941409,"Record function id": 0, "Sequence number": 29294738, "Fwd thread id": 1, "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016096723.451, "dur": 146.063, + "args": { + "External id": 2941410,"Sequence number": 29294738, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 481 + } + }, + { + "ph": "f", "id": 50, "pid": 1336755, "tid": 1381158, "ts": 1555016096723.451, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016096735.968, "dur": 35.285, + "args": { + "External id": 2941411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096739.959, "dur": 2.355, + "args": { + "External id": 2941412,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016096743.389, "dur": 27.338, + "args": { + "External id": 2941413,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016096777.600, "dur": 4.503, + "args": { + "External id": 2941414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096779.099, "dur": 2.709, + "args": { + "External id": 2941415,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096883.631, "dur": 16.468, + "args": { + "External id": 2941416,"Record function id": 0, "Sequence number": 29294737, "Fwd thread id": 1, "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096887.369, "dur": 9.433, + "args": { + "External id": 2941417,"Sequence number": 29294737, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 488 + } + }, + { + "ph": "f", "id": 51, "pid": 1336755, "tid": 1381158, "ts": 1555016096887.369, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016096889.760, "dur": 6.798, + "args": { + "External id": 2941418,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016096891.155, "dur": 5.228, + "args": { + "External id": 2941419,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096903.825, "dur": 10.713, + "args": { + "External id": 2941420,"Record function id": 0, "Sequence number": 29294736, "Fwd thread id": 1, "Ev Idx": 491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096904.966, "dur": 6.072, + "args": { + "External id": 2941421,"Sequence number": 29294736, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 492 + } + }, + { + "ph": "f", "id": 52, "pid": 1336755, "tid": 1381158, "ts": 1555016096904.966, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016096908.855, "dur": 2.040, + "args": { + "External id": 2941422,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016096909.555, "dur": 1.128, + "args": { + "External id": 2941423,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096917.679, "dur": 8.402, + "args": { + "External id": 2941424,"Record function id": 0, "Sequence number": 29294735, "Fwd thread id": 1, "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096918.379, "dur": 5.599, + "args": { + "External id": 2941425,"Sequence number": 29294735, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 496 + } + }, + { + "ph": "f", "id": 53, "pid": 1336755, "tid": 1381158, "ts": 1555016096918.379, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016096919.834, "dur": 3.994, + "args": { + "External id": 2941426,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016096922.926, "dur": 0.770, + "args": { + "External id": 2941427,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096931.437, "dur": 9.053, + "args": { + "External id": 2941428,"Record function id": 0, "Sequence number": 29294734, "Fwd thread id": 1, "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096932.469, "dur": 3.625, + "args": { + "External id": 2941429,"Sequence number": 29294734, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 500 + } + }, + { + "ph": "f", "id": 54, "pid": 1336755, "tid": 1381158, "ts": 1555016096932.469, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016096933.810, "dur": 2.143, + "args": { + "External id": 2941430,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016096934.559, "dur": 1.262, + "args": { + "External id": 2941431,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096943.669, "dur": 251.095, + "args": { + "External id": 2941432,"Record function id": 0, "Sequence number": 29294733, "Fwd thread id": 1, "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016096944.575, "dur": 238.915, + "args": { + "External id": 2941433,"Sequence number": 29294733, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 504 + } + }, + { + "ph": "f", "id": 55, "pid": 1336755, "tid": 1381158, "ts": 1555016096944.575, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016096948.642, "dur": 8.708, + "args": { + "External id": 2941434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016096950.583, "dur": 6.153, + "args": { + "External id": 2941435,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016096954.842, "dur": 1.607, + "args": { + "External id": 2941436,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016096958.986, "dur": 107.596, + "args": { + "External id": 2941437,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097069.289, "dur": 7.903, + "args": { + "External id": 2941438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097070.274, "dur": 6.063, + "args": { + "External id": 2941439,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097074.801, "dur": 1.329, + "args": { + "External id": 2941440,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097105.719, "dur": 5.817, + "args": { + "External id": 2941441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097107.329, "dur": 3.658, + "args": { + "External id": 2941442,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097110.407, "dur": 0.501, + "args": { + "External id": 2941443,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016097112.423, "dur": 69.419, + "args": { + "External id": 2941444,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097203.528, "dur": 8.785, + "args": { + "External id": 2941445,"Record function id": 0, "Sequence number": 29294732, "Fwd thread id": 1, "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097204.804, "dur": 5.307, + "args": { + "External id": 2941446,"Sequence number": 29294732, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 517 + } + }, + { + "ph": "f", "id": 56, "pid": 1336755, "tid": 1381158, "ts": 1555016097204.804, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016097207.254, "dur": 2.683, + "args": { + "External id": 2941447,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097207.904, "dur": 1.889, + "args": { + "External id": 2941448,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097215.813, "dur": 12.954, + "args": { + "External id": 2941449,"Record function id": 0, "Sequence number": 29294731, "Fwd thread id": 1, "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097216.854, "dur": 9.568, + "args": { + "External id": 2941450,"Sequence number": 29294731, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 521 + } + }, + { + "ph": "f", "id": 57, "pid": 1336755, "tid": 1381158, "ts": 1555016097216.854, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097220.888, "dur": 5.318, + "args": { + "External id": 2941451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097221.824, "dur": 3.883, + "args": { + "External id": 2941452,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097224.937, "dur": 0.652, + "args": { + "External id": 2941453,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097234.791, "dur": 8.773, + "args": { + "External id": 2941454,"Record function id": 0, "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097236.271, "dur": 6.544, + "args": { + "External id": 2941455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016097238.792, "dur": 3.540, + "args": { + "External id": 2941456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016097239.678, "dur": 2.498, + "args": { + "External id": 2941457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097250.012, "dur": 9.167, + "args": { + "External id": 2941458,"Record function id": 0, "Sequence number": 29294730, "Fwd thread id": 1, "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097250.882, "dur": 4.856, + "args": { + "External id": 2941459,"Sequence number": 29294730, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 530 + } + }, + { + "ph": "f", "id": 58, "pid": 1336755, "tid": 1381158, "ts": 1555016097250.882, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016097252.120, "dur": 3.475, + "args": { + "External id": 2941460,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097254.470, "dur": 0.924, + "args": { + "External id": 2941461,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097262.517, "dur": 103.211, + "args": { + "External id": 2941462,"Record function id": 0, "Sequence number": 29294729, "Fwd thread id": 1, "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097263.320, "dur": 92.992, + "args": { + "External id": 2941463,"Sequence number": 29294729, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 534 + } + }, + { + "ph": "f", "id": 59, "pid": 1336755, "tid": 1381158, "ts": 1555016097263.320, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097267.544, "dur": 4.231, + "args": { + "External id": 2941464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097268.111, "dur": 3.251, + "args": { + "External id": 2941465,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097270.837, "dur": 0.402, + "args": { + "External id": 2941466,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016097272.522, "dur": 34.464, + "args": { + "External id": 2941467,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097310.415, "dur": 2.573, + "args": { + "External id": 2941468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097311.181, "dur": 1.303, + "args": { + "External id": 2941469,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097311.907, "dur": 0.464, + "args": { + "External id": 2941470,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097314.253, "dur": 4.877, + "args": { + "External id": 2941471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097315.152, "dur": 3.549, + "args": { + "External id": 2941472,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097315.695, "dur": 2.872, + "args": { + "External id": 2941473,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016097322.112, "dur": 33.603, + "args": { + "External id": 2941474,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097369.996, "dur": 35.313, + "args": { + "External id": 2941475,"Record function id": 0, "Sequence number": 29294728, "Fwd thread id": 1, "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097371.034, "dur": 5.170, + "args": { + "External id": 2941476,"Sequence number": 29294728, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 547 + } + }, + { + "ph": "f", "id": 60, "pid": 1336755, "tid": 1381158, "ts": 1555016097371.034, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016097374.204, "dur": 1.864, + "args": { + "External id": 2941477,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097374.895, "dur": 1.044, + "args": { + "External id": 2941478,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016097379.160, "dur": 23.681, + "args": { + "External id": 2941479,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097408.938, "dur": 10.184, + "args": { + "External id": 2941480,"Record function id": 0, "Sequence number": 29294727, "Fwd thread id": 1, "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097409.955, "dur": 6.740, + "args": { + "External id": 2941481,"Sequence number": 29294727, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 552 + } + }, + { + "ph": "f", "id": 61, "pid": 1336755, "tid": 1381158, "ts": 1555016097409.955, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097411.081, "dur": 5.404, + "args": { + "External id": 2941482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097411.925, "dur": 3.982, + "args": { + "External id": 2941483,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097415.142, "dur": 0.616, + "args": { + "External id": 2941484,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097423.091, "dur": 7.849, + "args": { + "External id": 2941485,"Record function id": 0, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097427.121, "dur": 3.262, + "args": { + "External id": 2941486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016097428.010, "dur": 1.979, + "args": { + "External id": 2941487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016097428.667, "dur": 1.199, + "args": { + "External id": 2941488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097434.301, "dur": 6.406, + "args": { + "External id": 2941489,"Record function id": 0, "Sequence number": 29294726, "Fwd thread id": 1, "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097435.426, "dur": 2.809, + "args": { + "External id": 2941490,"Sequence number": 29294726, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 561 + } + }, + { + "ph": "f", "id": 62, "pid": 1336755, "tid": 1381158, "ts": 1555016097435.426, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016097436.547, "dur": 1.533, + "args": { + "External id": 2941491,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097437.115, "dur": 0.827, + "args": { + "External id": 2941492,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097444.222, "dur": 96.386, + "args": { + "External id": 2941493,"Record function id": 0, "Sequence number": 29294725, "Fwd thread id": 1, "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097445.018, "dur": 85.297, + "args": { + "External id": 2941494,"Sequence number": 29294725, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 565 + } + }, + { + "ph": "f", "id": 63, "pid": 1336755, "tid": 1381158, "ts": 1555016097445.018, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097451.471, "dur": 2.193, + "args": { + "External id": 2941495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097452.075, "dur": 1.159, + "args": { + "External id": 2941496,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097452.672, "dur": 0.443, + "args": { + "External id": 2941497,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016097454.254, "dur": 29.730, + "args": { + "External id": 2941498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097484.998, "dur": 7.776, + "args": { + "External id": 2941499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097485.578, "dur": 6.656, + "args": { + "External id": 2941500,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097491.531, "dur": 0.571, + "args": { + "External id": 2941501,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097494.275, "dur": 2.494, + "args": { + "External id": 2941502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097495.324, "dur": 0.956, + "args": { + "External id": 2941503,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097495.847, "dur": 0.353, + "args": { + "External id": 2941504,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016097497.257, "dur": 32.382, + "args": { + "External id": 2941505,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097544.973, "dur": 28.493, + "args": { + "External id": 2941506,"Record function id": 0, "Sequence number": 29294724, "Fwd thread id": 1, "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097546.079, "dur": 5.774, + "args": { + "External id": 2941507,"Sequence number": 29294724, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 578 + } + }, + { + "ph": "f", "id": 64, "pid": 1336755, "tid": 1381158, "ts": 1555016097546.079, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016097547.841, "dur": 3.847, + "args": { + "External id": 2941508,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097550.637, "dur": 0.910, + "args": { + "External id": 2941509,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016097554.108, "dur": 17.076, + "args": { + "External id": 2941510,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097577.172, "dur": 11.858, + "args": { + "External id": 2941511,"Record function id": 0, "Sequence number": 29294723, "Fwd thread id": 1, "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016097577.966, "dur": 8.138, + "args": { + "External id": 2941512,"Sequence number": 29294723, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 583 + } + }, + { + "ph": "f", "id": 65, "pid": 1336755, "tid": 1381158, "ts": 1555016097577.966, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016097578.944, "dur": 6.949, + "args": { + "External id": 2941513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016097581.939, "dur": 3.439, + "args": { + "External id": 2941514,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097584.749, "dur": 0.490, + "args": { + "External id": 2941515,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097592.895, "dur": 6.978, + "args": { + "External id": 2941516,"Record function id": 0, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097594.083, "dur": 5.312, + "args": { + "External id": 2941517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016097595.063, "dur": 3.850, + "args": { + "External id": 2941518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016097597.508, "dur": 1.270, + "args": { + "External id": 2941519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016097603.915, "dur": 367.155, + "args": { + "External id": 2941520,"Record function id": 0, "Sequence number": 29294722, "Fwd thread id": 1, "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016097605.068, "dur": 334.276, + "args": { + "External id": 2941521,"Sequence number": 29294722, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 592 + } + }, + { + "ph": "f", "id": 66, "pid": 1336755, "tid": 1381158, "ts": 1555016097605.068, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016097639.929, "dur": 1.559, + "args": { + "External id": 2941522,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097640.426, "dur": 0.881, + "args": { + "External id": 2941523,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016097657.230, "dur": 5.933, + "args": { + "External id": 2941524,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016097672.132, "dur": 2.061, + "args": { + "External id": 2941525,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097834.848, "dur": 1.416, + "args": { + "External id": 2941526,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016097840.194, "dur": 39.406, + "args": { + "External id": 2941527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097854.649, "dur": 0.912, + "args": { + "External id": 2941528,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016097885.134, "dur": 28.433, + "args": { + "External id": 2941529,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016097886.809, "dur": 26.557, + "args": { + "External id": 2941530,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016097890.106, "dur": 3.908, + "args": { + "External id": 2941531,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016097895.554, "dur": 17.240, + "args": { + "External id": 2941532,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016097919.710, "dur": 2.564, + "args": { + "External id": 2941533,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097920.942, "dur": 1.235, + "args": { + "External id": 2941534,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016097928.482, "dur": 3.784, + "args": { + "External id": 2941535,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016097931.203, "dur": 0.977, + "args": { + "External id": 2941536,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016097949.339, "dur": 16.884, + "args": { + "External id": 2941537,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097979.780, "dur": 51.680, + "args": { + "External id": 2941538,"Record function id": 0, "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016097981.245, "dur": 48.781, + "args": { + "External id": 2941539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016097982.807, "dur": 45.506, + "args": { + "External id": 2941540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016098025.240, "dur": 2.682, + "args": { + "External id": 2941541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098040.013, "dur": 9.000, + "args": { + "External id": 2941542,"Record function id": 0, "Sequence number": 29294721, "Fwd thread id": 1, "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098041.646, "dur": 1.185, + "args": { + "External id": 2941543,"Sequence number": 29294721, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 614 + } + }, + { + "ph": "f", "id": 67, "pid": 1336755, "tid": 1381158, "ts": 1555016098041.646, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016098053.127, "dur": 447.131, + "args": { + "External id": 2941544,"Record function id": 0, "Sequence number": 29294720, "Fwd thread id": 1, "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016098054.096, "dur": 431.636, + "args": { + "External id": 2941545,"Sequence number": 29294720, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 616 + } + }, + { + "ph": "f", "id": 68, "pid": 1336755, "tid": 1381158, "ts": 1555016098054.096, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098087.235, "dur": 7.944, + "args": { + "External id": 2941546,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016098091.777, "dur": 3.101, + "args": { + "External id": 2941547,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098098.981, "dur": 8.173, + "args": { + "External id": 2941548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098100.121, "dur": 6.380, + "args": { + "External id": 2941549,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098105.359, "dur": 0.951, + "args": { + "External id": 2941550,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1381158, + "ts": 1555016098110.414, "dur": 109.615, + "args": { + "External id": 2941551,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098111.457, "dur": 5.410, + "args": { + "External id": 2941552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098112.114, "dur": 4.101, + "args": { + "External id": 2941553,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098112.855, "dur": 3.251, + "args": { + "External id": 2941554,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1381158, + "ts": 1555016098118.105, "dur": 101.275, + "args": { + "External id": 2941555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016098121.753, "dur": 96.342, + "args": { + "External id": 2941556,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016098225.887, "dur": 3.156, + "args": { + "External id": 2941557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098227.126, "dur": 1.810, + "args": { + "External id": 2941558,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016098263.185, "dur": 6.481, + "args": { + "External id": 2941559,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016098270.625, "dur": 2.191, + "args": { + "External id": 2941560,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016098273.575, "dur": 1.941, + "args": { + "External id": 2941561,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098310.764, "dur": 1.891, + "args": { + "External id": 2941562,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098311.398, "dur": 1.104, + "args": { + "External id": 2941563,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336755, "tid": 1381158, + "ts": 1555016098335.591, "dur": 132.568, + "args": { + "External id": 2941564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016098340.261, "dur": 4.979, + "args": { + "External id": 2941565,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098343.388, "dur": 0.989, + "args": { + "External id": 2941566,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016098346.633, "dur": 10.796, + "args": { + "External id": 2941567,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098353.267, "dur": 3.328, + "args": { + "External id": 2941568,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016098359.046, "dur": 4.072, + "args": { + "External id": 2941569,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098362.363, "dur": 0.441, + "args": { + "External id": 2941570,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016098363.667, "dur": 4.250, + "args": { + "External id": 2941571,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098366.818, "dur": 0.538, + "args": { + "External id": 2941572,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016098373.459, "dur": 2.160, + "args": { + "External id": 2941573,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098374.575, "dur": 0.743, + "args": { + "External id": 2941574,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098378.531, "dur": 5.026, + "args": { + "External id": 2941575,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016098381.191, "dur": 2.157, + "args": { + "External id": 2941576,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016098386.471, "dur": 1.440, + "args": { + "External id": 2941577,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098387.296, "dur": 0.335, + "args": { + "External id": 2941578,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098388.588, "dur": 6.424, + "args": { + "External id": 2941579,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098392.031, "dur": 2.885, + "args": { + "External id": 2941580,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016098396.471, "dur": 55.520, + "args": { + "External id": 2941581,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098456.433, "dur": 3.294, + "args": { + "External id": 2941582,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016098460.196, "dur": 3.956, + "args": { + "External id": 2941583,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098463.148, "dur": 0.386, + "args": { + "External id": 2941584,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098466.121, "dur": 0.939, + "args": { + "External id": 2941585,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016098511.458, "dur": 8.379, + "args": { + "External id": 2941586,"Record function id": 0, "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016098513.165, "dur": 6.015, + "args": { + "External id": 2941587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016098514.985, "dur": 3.383, + "args": { + "External id": 2941588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016098515.803, "dur": 2.450, + "args": { + "External id": 2941589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098523.679, "dur": 9.762, + "args": { + "External id": 2941590,"Record function id": 0, "Sequence number": 29294719, "Fwd thread id": 1, "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098524.737, "dur": 5.892, + "args": { + "External id": 2941591,"Sequence number": 29294719, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 662 + } + }, + { + "ph": "f", "id": 69, "pid": 1336755, "tid": 1381158, "ts": 1555016098524.737, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098526.452, "dur": 4.000, + "args": { + "External id": 2941592,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098529.611, "dur": 0.703, + "args": { + "External id": 2941593,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098536.880, "dur": 135.376, + "args": { + "External id": 2941594,"Record function id": 0, "Sequence number": 29294718, "Fwd thread id": 1, "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098537.728, "dur": 125.233, + "args": { + "External id": 2941595,"Sequence number": 29294718, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 666 + } + }, + { + "ph": "f", "id": 70, "pid": 1336755, "tid": 1381158, "ts": 1555016098537.728, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098541.078, "dur": 5.790, + "args": { + "External id": 2941596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098542.387, "dur": 3.932, + "args": { + "External id": 2941597,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098545.457, "dur": 0.697, + "args": { + "External id": 2941598,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016098548.198, "dur": 42.828, + "args": { + "External id": 2941599,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098594.820, "dur": 4.081, + "args": { + "External id": 2941600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098595.916, "dur": 1.951, + "args": { + "External id": 2941601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098596.720, "dur": 0.976, + "args": { + "External id": 2941602,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098600.883, "dur": 3.385, + "args": { + "External id": 2941603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098602.239, "dur": 1.418, + "args": { + "External id": 2941604,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098602.998, "dur": 0.534, + "args": { + "External id": 2941605,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016098606.965, "dur": 55.181, + "args": { + "External id": 2941606,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098676.793, "dur": 8.600, + "args": { + "External id": 2941607,"Record function id": 0, "Sequence number": 29294717, "Fwd thread id": 1, "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098677.816, "dur": 5.786, + "args": { + "External id": 2941608,"Sequence number": 29294717, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 679 + } + }, + { + "ph": "f", "id": 71, "pid": 1336755, "tid": 1381158, "ts": 1555016098677.816, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098681.653, "dur": 1.808, + "args": { + "External id": 2941609,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098682.100, "dur": 1.220, + "args": { + "External id": 2941610,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098688.771, "dur": 8.472, + "args": { + "External id": 2941611,"Record function id": 0, "Sequence number": 29294716, "Fwd thread id": 1, "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098689.788, "dur": 4.132, + "args": { + "External id": 2941612,"Sequence number": 29294716, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 683 + } + }, + { + "ph": "f", "id": 72, "pid": 1336755, "tid": 1381158, "ts": 1555016098689.788, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098690.970, "dur": 2.757, + "args": { + "External id": 2941613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098691.826, "dur": 1.428, + "args": { + "External id": 2941614,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098692.635, "dur": 0.474, + "args": { + "External id": 2941615,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016098700.999, "dur": 7.483, + "args": { + "External id": 2941616,"Record function id": 0, "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016098702.201, "dur": 5.809, + "args": { + "External id": 2941617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016098703.272, "dur": 4.513, + "args": { + "External id": 2941618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016098706.700, "dur": 1.007, + "args": { + "External id": 2941619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098711.495, "dur": 7.741, + "args": { + "External id": 2941620,"Record function id": 0, "Sequence number": 29294715, "Fwd thread id": 1, "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098714.174, "dur": 2.856, + "args": { + "External id": 2941621,"Sequence number": 29294715, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 692 + } + }, + { + "ph": "f", "id": 73, "pid": 1336755, "tid": 1381158, "ts": 1555016098714.174, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098715.503, "dur": 1.380, + "args": { + "External id": 2941622,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098716.158, "dur": 0.593, + "args": { + "External id": 2941623,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098722.348, "dur": 119.555, + "args": { + "External id": 2941624,"Record function id": 0, "Sequence number": 29294714, "Fwd thread id": 1, "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098723.199, "dur": 109.699, + "args": { + "External id": 2941625,"Sequence number": 29294714, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 696 + } + }, + { + "ph": "f", "id": 74, "pid": 1336755, "tid": 1381158, "ts": 1555016098723.199, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098726.814, "dur": 7.689, + "args": { + "External id": 2941626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098729.905, "dur": 4.127, + "args": { + "External id": 2941627,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098733.504, "dur": 0.383, + "args": { + "External id": 2941628,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016098735.204, "dur": 35.546, + "args": { + "External id": 2941629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098771.766, "dur": 5.088, + "args": { + "External id": 2941630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098772.435, "dur": 3.850, + "args": { + "External id": 2941631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098773.235, "dur": 2.896, + "args": { + "External id": 2941632,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098780.096, "dur": 7.335, + "args": { + "External id": 2941633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098781.025, "dur": 5.795, + "args": { + "External id": 2941634,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098784.202, "dur": 2.531, + "args": { + "External id": 2941635,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016098787.967, "dur": 44.231, + "args": { + "External id": 2941636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098846.124, "dur": 29.724, + "args": { + "External id": 2941637,"Record function id": 0, "Sequence number": 29294713, "Fwd thread id": 1, "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098847.262, "dur": 3.368, + "args": { + "External id": 2941638,"Sequence number": 29294713, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 709 + } + }, + { + "ph": "f", "id": 75, "pid": 1336755, "tid": 1381158, "ts": 1555016098847.262, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098848.680, "dur": 1.797, + "args": { + "External id": 2941639,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098849.362, "dur": 0.985, + "args": { + "External id": 2941640,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016098853.311, "dur": 19.639, + "args": { + "External id": 2941641,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098879.349, "dur": 12.106, + "args": { + "External id": 2941642,"Record function id": 0, "Sequence number": 29294712, "Fwd thread id": 1, "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016098882.311, "dur": 6.883, + "args": { + "External id": 2941643,"Sequence number": 29294712, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 714 + } + }, + { + "ph": "f", "id": 76, "pid": 1336755, "tid": 1381158, "ts": 1555016098882.311, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016098883.473, "dur": 5.493, + "args": { + "External id": 2941644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016098884.091, "dur": 4.336, + "args": { + "External id": 2941645,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016098887.749, "dur": 0.546, + "args": { + "External id": 2941646,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016098895.135, "dur": 4.762, + "args": { + "External id": 2941647,"Record function id": 0, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016098896.248, "dur": 3.145, + "args": { + "External id": 2941648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016098897.228, "dur": 1.728, + "args": { + "External id": 2941649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016098897.641, "dur": 1.227, + "args": { + "External id": 2941650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016098905.427, "dur": 485.806, + "args": { + "External id": 2941651,"Record function id": 0, "Sequence number": 29294711, "Fwd thread id": 1, "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016098906.691, "dur": 445.946, + "args": { + "External id": 2941652,"Sequence number": 29294711, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 723 + } + }, + { + "ph": "f", "id": 77, "pid": 1336755, "tid": 1381158, "ts": 1555016098906.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1381158, + "ts": 1555016098926.958, "dur": 34.494, + "args": { + "External id": 2941653,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016098928.172, "dur": 33.087, + "args": { + "External id": 2941654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016098930.534, "dur": 7.111, + "args": { + "External id": 2941655,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016098934.674, "dur": 2.488, + "args": { + "External id": 2941656,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016098938.902, "dur": 21.942, + "args": { + "External id": 2941657,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098974.885, "dur": 1.768, + "args": { + "External id": 2941658,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098975.390, "dur": 1.146, + "args": { + "External id": 2941659,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016098980.413, "dur": 41.845, + "args": { + "External id": 2941660,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016098981.229, "dur": 40.534, + "args": { + "External id": 2941661,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016099038.293, "dur": 3.346, + "args": { + "External id": 2941662,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016099052.145, "dur": 2.067, + "args": { + "External id": 2941663,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016099238.967, "dur": 3.256, + "args": { + "External id": 2941664,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016099247.274, "dur": 34.077, + "args": { + "External id": 2941665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099256.426, "dur": 0.780, + "args": { + "External id": 2941666,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016099286.774, "dur": 32.547, + "args": { + "External id": 2941667,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016099288.505, "dur": 30.609, + "args": { + "External id": 2941668,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099294.643, "dur": 4.381, + "args": { + "External id": 2941669,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016099302.522, "dur": 16.016, + "args": { + "External id": 2941670,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016099323.291, "dur": 1.901, + "args": { + "External id": 2941671,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016099324.139, "dur": 0.919, + "args": { + "External id": 2941672,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016099331.139, "dur": 1.851, + "args": { + "External id": 2941673,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016099331.699, "dur": 1.145, + "args": { + "External id": 2941674,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016099336.916, "dur": 4.414, + "args": { + "External id": 2941675,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016099337.492, "dur": 3.738, + "args": { + "External id": 2941676,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016099371.996, "dur": 17.775, + "args": { + "External id": 2941677,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016099404.467, "dur": 7.683, + "args": { + "External id": 2941678,"Record function id": 0, "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016099406.099, "dur": 5.416, + "args": { + "External id": 2941679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016099407.955, "dur": 2.545, + "args": { + "External id": 2941680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016099408.738, "dur": 1.651, + "args": { + "External id": 2941681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099416.010, "dur": 7.886, + "args": { + "External id": 2941682,"Record function id": 0, "Sequence number": 29294710, "Fwd thread id": 1, "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099417.150, "dur": 3.593, + "args": { + "External id": 2941683,"Sequence number": 29294710, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 754 + } + }, + { + "ph": "f", "id": 78, "pid": 1336755, "tid": 1381158, "ts": 1555016099417.150, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016099418.700, "dur": 1.814, + "args": { + "External id": 2941684,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016099419.366, "dur": 1.041, + "args": { + "External id": 2941685,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099429.259, "dur": 135.634, + "args": { + "External id": 2941686,"Record function id": 0, "Sequence number": 29294709, "Fwd thread id": 1, "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099430.500, "dur": 126.587, + "args": { + "External id": 2941687,"Sequence number": 29294709, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 758 + } + }, + { + "ph": "f", "id": 79, "pid": 1336755, "tid": 1381158, "ts": 1555016099430.500, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016099434.475, "dur": 6.354, + "args": { + "External id": 2941688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016099435.794, "dur": 4.443, + "args": { + "External id": 2941689,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099439.283, "dur": 0.744, + "args": { + "External id": 2941690,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016099441.998, "dur": 57.430, + "args": { + "External id": 2941691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016099500.906, "dur": 8.739, + "args": { + "External id": 2941692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016099504.071, "dur": 4.987, + "args": { + "External id": 2941693,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099505.240, "dur": 3.658, + "args": { + "External id": 2941694,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016099512.053, "dur": 3.053, + "args": { + "External id": 2941695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016099513.225, "dur": 1.412, + "args": { + "External id": 2941696,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099514.126, "dur": 0.446, + "args": { + "External id": 2941697,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016099515.570, "dur": 40.608, + "args": { + "External id": 2941698,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099569.312, "dur": 14.498, + "args": { + "External id": 2941699,"Record function id": 0, "Sequence number": 29294708, "Fwd thread id": 1, "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099572.450, "dur": 8.123, + "args": { + "External id": 2941700,"Sequence number": 29294708, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 771 + } + }, + { + "ph": "f", "id": 80, "pid": 1336755, "tid": 1381158, "ts": 1555016099572.450, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016099576.166, "dur": 4.251, + "args": { + "External id": 2941701,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016099576.606, "dur": 3.662, + "args": { + "External id": 2941702,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099589.719, "dur": 6.629, + "args": { + "External id": 2941703,"Record function id": 0, "Sequence number": 29294707, "Fwd thread id": 1, "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099590.653, "dur": 3.618, + "args": { + "External id": 2941704,"Sequence number": 29294707, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 775 + } + }, + { + "ph": "f", "id": 81, "pid": 1336755, "tid": 1381158, "ts": 1555016099590.653, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016099591.654, "dur": 2.383, + "args": { + "External id": 2941705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016099592.226, "dur": 1.333, + "args": { + "External id": 2941706,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099592.979, "dur": 0.489, + "args": { + "External id": 2941707,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016099600.385, "dur": 7.303, + "args": { + "External id": 2941708,"Record function id": 0, "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016099601.970, "dur": 5.220, + "args": { + "External id": 2941709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016099602.961, "dur": 3.998, + "args": { + "External id": 2941710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016099605.750, "dur": 1.105, + "args": { + "External id": 2941711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099611.065, "dur": 9.669, + "args": { + "External id": 2941712,"Record function id": 0, "Sequence number": 29294706, "Fwd thread id": 1, "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016099611.872, "dur": 5.307, + "args": { + "External id": 2941713,"Sequence number": 29294706, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 784 + } + }, + { + "ph": "f", "id": 82, "pid": 1336755, "tid": 1381158, "ts": 1555016099611.872, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016099613.205, "dur": 3.813, + "args": { + "External id": 2941714,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016099615.854, "dur": 1.047, + "args": { + "External id": 2941715,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016099624.805, "dur": 295.335, + "args": { + "External id": 2941716,"Record function id": 0, "Sequence number": 29294705, "Fwd thread id": 1, "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016099625.749, "dur": 269.854, + "args": { + "External id": 2941717,"Sequence number": 29294705, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 788 + } + }, + { + "ph": "f", "id": 83, "pid": 1336755, "tid": 1381158, "ts": 1555016099625.749, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016099643.618, "dur": 5.254, + "args": { + "External id": 2941718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099645.400, "dur": 3.014, + "args": { + "External id": 2941719,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016099651.163, "dur": 4.913, + "args": { + "External id": 2941720,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099653.870, "dur": 1.979, + "args": { + "External id": 2941721,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016099658.078, "dur": 5.290, + "args": { + "External id": 2941722,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099659.210, "dur": 3.975, + "args": { + "External id": 2941723,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016099689.047, "dur": 181.887, + "args": { + "External id": 2941724,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016099778.673, "dur": 3.244, + "args": { + "External id": 2941725,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016099783.616, "dur": 4.293, + "args": { + "External id": 2941726,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016099883.019, "dur": 3.260, + "args": { + "External id": 2941727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016099889.127, "dur": 0.965, + "args": { + "External id": 2941728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016099891.895, "dur": 0.529, + "args": { + "External id": 2941729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016099926.701, "dur": 290.460, + "args": { + "External id": 2941730,"Record function id": 0, "Sequence number": 29294704, "Fwd thread id": 1, "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016099927.909, "dur": 279.659, + "args": { + "External id": 2941731,"Sequence number": 29294704, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 802 + } + }, + { + "ph": "f", "id": 84, "pid": 1336755, "tid": 1381158, "ts": 1555016099927.909, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016099947.144, "dur": 82.985, + "args": { + "External id": 2941732,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016099949.475, "dur": 2.057, + "args": { + "External id": 2941733,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016099952.643, "dur": 76.365, + "args": { + "External id": 2941734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016100042.462, "dur": 7.704, + "args": { + "External id": 2941735,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100046.752, "dur": 3.054, + "args": { + "External id": 2941736,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016100226.983, "dur": 158.515, + "args": { + "External id": 2941737,"Record function id": 0, "Sequence number": 29294703, "Fwd thread id": 1, "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016100228.789, "dur": 149.863, + "args": { + "External id": 2941738,"Sequence number": 29294703, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 809 + } + }, + { + "ph": "f", "id": 85, "pid": 1336755, "tid": 1381158, "ts": 1555016100228.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016100241.725, "dur": 35.627, + "args": { + "External id": 2941739,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100244.154, "dur": 3.080, + "args": { + "External id": 2941740,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016100248.327, "dur": 28.490, + "args": { + "External id": 2941741,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016100284.456, "dur": 7.561, + "args": { + "External id": 2941742,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100290.023, "dur": 1.662, + "args": { + "External id": 2941743,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100391.455, "dur": 17.853, + "args": { + "External id": 2941744,"Record function id": 0, "Sequence number": 29294702, "Fwd thread id": 1, "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100395.180, "dur": 10.317, + "args": { + "External id": 2941745,"Sequence number": 29294702, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 816 + } + }, + { + "ph": "f", "id": 86, "pid": 1336755, "tid": 1381158, "ts": 1555016100395.180, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100397.514, "dur": 7.701, + "args": { + "External id": 2941746,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100398.766, "dur": 6.254, + "args": { + "External id": 2941747,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100412.775, "dur": 8.417, + "args": { + "External id": 2941748,"Record function id": 0, "Sequence number": 29294701, "Fwd thread id": 1, "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100413.699, "dur": 4.959, + "args": { + "External id": 2941749,"Sequence number": 29294701, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 820 + } + }, + { + "ph": "f", "id": 87, "pid": 1336755, "tid": 1381158, "ts": 1555016100413.699, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100414.731, "dur": 3.770, + "args": { + "External id": 2941750,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100417.440, "dur": 0.915, + "args": { + "External id": 2941751,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100426.864, "dur": 8.608, + "args": { + "External id": 2941752,"Record function id": 0, "Sequence number": 29294700, "Fwd thread id": 1, "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100428.085, "dur": 5.020, + "args": { + "External id": 2941753,"Sequence number": 29294700, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 824 + } + }, + { + "ph": "f", "id": 88, "pid": 1336755, "tid": 1381158, "ts": 1555016100428.085, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100429.217, "dur": 3.745, + "args": { + "External id": 2941754,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100432.026, "dur": 0.811, + "args": { + "External id": 2941755,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100438.617, "dur": 9.433, + "args": { + "External id": 2941756,"Record function id": 0, "Sequence number": 29294699, "Fwd thread id": 1, "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100439.528, "dur": 5.579, + "args": { + "External id": 2941757,"Sequence number": 29294699, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 828 + } + }, + { + "ph": "f", "id": 89, "pid": 1336755, "tid": 1381158, "ts": 1555016100439.528, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100441.280, "dur": 3.675, + "args": { + "External id": 2941758,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100444.003, "dur": 0.810, + "args": { + "External id": 2941759,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100451.428, "dur": 151.617, + "args": { + "External id": 2941760,"Record function id": 0, "Sequence number": 29294698, "Fwd thread id": 1, "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100452.146, "dur": 142.068, + "args": { + "External id": 2941761,"Sequence number": 29294698, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 832 + } + }, + { + "ph": "f", "id": 90, "pid": 1336755, "tid": 1381158, "ts": 1555016100452.146, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100457.112, "dur": 8.087, + "args": { + "External id": 2941762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100458.841, "dur": 5.714, + "args": { + "External id": 2941763,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100462.822, "dur": 1.442, + "args": { + "External id": 2941764,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016100466.965, "dur": 66.666, + "args": { + "External id": 2941765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100535.010, "dur": 3.889, + "args": { + "External id": 2941766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100536.045, "dur": 2.325, + "args": { + "External id": 2941767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100537.197, "dur": 1.011, + "args": { + "External id": 2941768,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100540.711, "dur": 7.624, + "args": { + "External id": 2941769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100542.083, "dur": 5.735, + "args": { + "External id": 2941770,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100547.421, "dur": 0.306, + "args": { + "External id": 2941771,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016100549.101, "dur": 44.395, + "args": { + "External id": 2941772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100607.635, "dur": 7.675, + "args": { + "External id": 2941773,"Record function id": 0, "Sequence number": 29294697, "Fwd thread id": 1, "Ev Idx": 844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100608.622, "dur": 3.731, + "args": { + "External id": 2941774,"Sequence number": 29294697, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 845 + } + }, + { + "ph": "f", "id": 91, "pid": 1336755, "tid": 1381158, "ts": 1555016100608.622, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100610.152, "dur": 2.068, + "args": { + "External id": 2941775,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100611.060, "dur": 1.045, + "args": { + "External id": 2941776,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100618.777, "dur": 10.136, + "args": { + "External id": 2941777,"Record function id": 0, "Sequence number": 29294696, "Fwd thread id": 1, "Ev Idx": 848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100619.825, "dur": 6.967, + "args": { + "External id": 2941778,"Sequence number": 29294696, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 849 + } + }, + { + "ph": "f", "id": 92, "pid": 1336755, "tid": 1381158, "ts": 1555016100619.825, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100621.178, "dur": 5.384, + "args": { + "External id": 2941779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100624.118, "dur": 1.929, + "args": { + "External id": 2941780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100625.139, "dur": 0.818, + "args": { + "External id": 2941781,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016100636.431, "dur": 7.951, + "args": { + "External id": 2941782,"Record function id": 0, "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016100637.688, "dur": 5.991, + "args": { + "External id": 2941783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016100639.958, "dur": 3.340, + "args": { + "External id": 2941784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016100640.873, "dur": 2.311, + "args": { + "External id": 2941785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100647.536, "dur": 6.114, + "args": { + "External id": 2941786,"Record function id": 0, "Sequence number": 29294695, "Fwd thread id": 1, "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100648.582, "dur": 2.909, + "args": { + "External id": 2941787,"Sequence number": 29294695, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 858 + } + }, + { + "ph": "f", "id": 93, "pid": 1336755, "tid": 1381158, "ts": 1555016100648.582, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100650.103, "dur": 1.253, + "args": { + "External id": 2941788,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100650.526, "dur": 0.698, + "args": { + "External id": 2941789,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100659.047, "dur": 113.287, + "args": { + "External id": 2941790,"Record function id": 0, "Sequence number": 29294694, "Fwd thread id": 1, "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100660.046, "dur": 105.091, + "args": { + "External id": 2941791,"Sequence number": 29294694, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 862 + } + }, + { + "ph": "f", "id": 94, "pid": 1336755, "tid": 1381158, "ts": 1555016100660.046, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100662.983, "dur": 2.593, + "args": { + "External id": 2941792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100663.863, "dur": 1.312, + "args": { + "External id": 2941793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100664.720, "dur": 0.358, + "args": { + "External id": 2941794,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016100666.181, "dur": 27.780, + "args": { + "External id": 2941795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100694.868, "dur": 9.529, + "args": { + "External id": 2941796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100698.626, "dur": 5.260, + "args": { + "External id": 2941797,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100702.494, "dur": 1.283, + "args": { + "External id": 2941798,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100705.923, "dur": 4.985, + "args": { + "External id": 2941799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100706.940, "dur": 3.390, + "args": { + "External id": 2941800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100707.546, "dur": 2.659, + "args": { + "External id": 2941801,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016100711.328, "dur": 52.996, + "args": { + "External id": 2941802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100776.783, "dur": 36.665, + "args": { + "External id": 2941803,"Record function id": 0, "Sequence number": 29294693, "Fwd thread id": 1, "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100780.435, "dur": 6.058, + "args": { + "External id": 2941804,"Sequence number": 29294693, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 875 + } + }, + { + "ph": "f", "id": 95, "pid": 1336755, "tid": 1381158, "ts": 1555016100780.435, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100782.110, "dur": 4.206, + "args": { + "External id": 2941805,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100785.195, "dur": 0.973, + "args": { + "External id": 2941806,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016100789.338, "dur": 22.076, + "args": { + "External id": 2941807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100817.084, "dur": 10.362, + "args": { + "External id": 2941808,"Record function id": 0, "Sequence number": 29294692, "Fwd thread id": 1, "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100818.425, "dur": 6.206, + "args": { + "External id": 2941809,"Sequence number": 29294692, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 880 + } + }, + { + "ph": "f", "id": 96, "pid": 1336755, "tid": 1381158, "ts": 1555016100818.425, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100819.525, "dur": 4.915, + "args": { + "External id": 2941810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100820.403, "dur": 3.533, + "args": { + "External id": 2941811,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100823.382, "dur": 0.466, + "args": { + "External id": 2941812,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016100831.357, "dur": 5.237, + "args": { + "External id": 2941813,"Record function id": 0, "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016100832.479, "dur": 3.621, + "args": { + "External id": 2941814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016100833.639, "dur": 2.036, + "args": { + "External id": 2941815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016100834.330, "dur": 1.227, + "args": { + "External id": 2941816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100839.872, "dur": 8.975, + "args": { + "External id": 2941817,"Record function id": 0, "Sequence number": 29294691, "Fwd thread id": 1, "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100840.880, "dur": 5.539, + "args": { + "External id": 2941818,"Sequence number": 29294691, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 889 + } + }, + { + "ph": "f", "id": 97, "pid": 1336755, "tid": 1381158, "ts": 1555016100840.880, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016100842.373, "dur": 3.903, + "args": { + "External id": 2941819,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016100845.472, "dur": 0.677, + "args": { + "External id": 2941820,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100854.480, "dur": 178.104, + "args": { + "External id": 2941821,"Record function id": 0, "Sequence number": 29294690, "Fwd thread id": 1, "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016100857.179, "dur": 163.562, + "args": { + "External id": 2941822,"Sequence number": 29294690, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 893 + } + }, + { + "ph": "f", "id": 98, "pid": 1336755, "tid": 1381158, "ts": 1555016100857.179, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100860.629, "dur": 2.327, + "args": { + "External id": 2941823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100861.038, "dur": 1.507, + "args": { + "External id": 2941824,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100861.872, "dur": 0.573, + "args": { + "External id": 2941825,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016100863.624, "dur": 28.346, + "args": { + "External id": 2941826,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100892.918, "dur": 5.551, + "args": { + "External id": 2941827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100893.371, "dur": 4.639, + "args": { + "External id": 2941828,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100896.441, "dur": 1.448, + "args": { + "External id": 2941829,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016100899.875, "dur": 7.534, + "args": { + "External id": 2941830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016100903.604, "dur": 3.417, + "args": { + "External id": 2941831,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016100906.578, "dur": 0.378, + "args": { + "External id": 2941832,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016100907.930, "dur": 111.403, + "args": { + "External id": 2941833,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101039.646, "dur": 31.648, + "args": { + "External id": 2941834,"Record function id": 0, "Sequence number": 29294689, "Fwd thread id": 1, "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101040.937, "dur": 6.530, + "args": { + "External id": 2941835,"Sequence number": 29294689, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 906 + } + }, + { + "ph": "f", "id": 99, "pid": 1336755, "tid": 1381158, "ts": 1555016101040.937, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101044.816, "dur": 2.508, + "args": { + "External id": 2941836,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101045.347, "dur": 1.884, + "args": { + "External id": 2941837,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016101049.833, "dur": 19.100, + "args": { + "External id": 2941838,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101074.955, "dur": 9.484, + "args": { + "External id": 2941839,"Record function id": 0, "Sequence number": 29294688, "Fwd thread id": 1, "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101076.151, "dur": 6.214, + "args": { + "External id": 2941840,"Sequence number": 29294688, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 911 + } + }, + { + "ph": "f", "id": 100, "pid": 1336755, "tid": 1381158, "ts": 1555016101076.151, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016101077.251, "dur": 4.915, + "args": { + "External id": 2941841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016101078.120, "dur": 3.493, + "args": { + "External id": 2941842,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101081.068, "dur": 0.426, + "args": { + "External id": 2941843,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016101088.609, "dur": 7.456, + "args": { + "External id": 2941844,"Record function id": 0, "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016101089.667, "dur": 5.865, + "args": { + "External id": 2941845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016101091.225, "dur": 3.832, + "args": { + "External id": 2941846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016101093.703, "dur": 1.247, + "args": { + "External id": 2941847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016101100.125, "dur": 387.138, + "args": { + "External id": 2941848,"Record function id": 0, "Sequence number": 29294687, "Fwd thread id": 1, "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016101101.268, "dur": 353.430, + "args": { + "External id": 2941849,"Sequence number": 29294687, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 920 + } + }, + { + "ph": "f", "id": 101, "pid": 1336755, "tid": 1381158, "ts": 1555016101101.268, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101136.149, "dur": 1.620, + "args": { + "External id": 2941850,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101136.722, "dur": 0.894, + "args": { + "External id": 2941851,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016101166.317, "dur": 6.873, + "args": { + "External id": 2941852,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016101184.666, "dur": 1.766, + "args": { + "External id": 2941853,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101337.408, "dur": 2.715, + "args": { + "External id": 2941854,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016101344.071, "dur": 36.998, + "args": { + "External id": 2941855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101354.812, "dur": 0.985, + "args": { + "External id": 2941856,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016101391.700, "dur": 38.887, + "args": { + "External id": 2941857,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016101393.527, "dur": 36.858, + "args": { + "External id": 2941858,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101399.647, "dur": 6.103, + "args": { + "External id": 2941859,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016101407.359, "dur": 22.372, + "args": { + "External id": 2941860,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016101436.971, "dur": 2.105, + "args": { + "External id": 2941861,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101437.898, "dur": 1.036, + "args": { + "External id": 2941862,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101445.743, "dur": 1.728, + "args": { + "External id": 2941863,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101446.261, "dur": 1.110, + "args": { + "External id": 2941864,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016101465.823, "dur": 17.305, + "args": { + "External id": 2941865,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016101498.533, "dur": 13.979, + "args": { + "External id": 2941866,"Record function id": 0, "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016101502.992, "dur": 8.661, + "args": { + "External id": 2941867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016101504.938, "dur": 5.791, + "args": { + "External id": 2941868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016101508.601, "dur": 2.005, + "args": { + "External id": 2941869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101516.666, "dur": 6.743, + "args": { + "External id": 2941870,"Record function id": 0, "Sequence number": 29294686, "Fwd thread id": 1, "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101518.305, "dur": 1.121, + "args": { + "External id": 2941871,"Sequence number": 29294686, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 942 + } + }, + { + "ph": "f", "id": 102, "pid": 1336755, "tid": 1381158, "ts": 1555016101518.305, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016101527.295, "dur": 424.323, + "args": { + "External id": 2941872,"Record function id": 0, "Sequence number": 29294685, "Fwd thread id": 1, "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016101528.282, "dur": 409.521, + "args": { + "External id": 2941873,"Sequence number": 29294685, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 944 + } + }, + { + "ph": "f", "id": 103, "pid": 1336755, "tid": 1381158, "ts": 1555016101528.282, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101557.635, "dur": 7.145, + "args": { + "External id": 2941874,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016101561.885, "dur": 2.581, + "args": { + "External id": 2941875,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016101568.698, "dur": 10.722, + "args": { + "External id": 2941876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016101572.286, "dur": 6.524, + "args": { + "External id": 2941877,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101575.452, "dur": 3.195, + "args": { + "External id": 2941878,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1381158, + "ts": 1555016101582.814, "dur": 102.283, + "args": { + "External id": 2941879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016101583.772, "dur": 2.553, + "args": { + "External id": 2941880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016101584.582, "dur": 1.249, + "args": { + "External id": 2941881,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101585.174, "dur": 0.566, + "args": { + "External id": 2941882,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1381158, + "ts": 1555016101589.729, "dur": 94.894, + "args": { + "External id": 2941883,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016101590.973, "dur": 92.623, + "args": { + "External id": 2941884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016101688.425, "dur": 2.413, + "args": { + "External id": 2941885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101689.210, "dur": 1.488, + "args": { + "External id": 2941886,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016101718.781, "dur": 5.322, + "args": { + "External id": 2941887,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016101727.148, "dur": 2.250, + "args": { + "External id": 2941888,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016101730.265, "dur": 1.652, + "args": { + "External id": 2941889,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101765.605, "dur": 1.841, + "args": { + "External id": 2941890,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101766.127, "dur": 1.160, + "args": { + "External id": 2941891,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336755, "tid": 1381158, + "ts": 1555016101787.642, "dur": 132.914, + "args": { + "External id": 2941892,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016101792.627, "dur": 8.667, + "args": { + "External id": 2941893,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101797.604, "dur": 2.736, + "args": { + "External id": 2941894,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016101802.576, "dur": 8.286, + "args": { + "External id": 2941895,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101809.112, "dur": 0.675, + "args": { + "External id": 2941896,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016101812.355, "dur": 1.714, + "args": { + "External id": 2941897,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101813.210, "dur": 0.560, + "args": { + "External id": 2941898,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016101817.295, "dur": 1.748, + "args": { + "External id": 2941899,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101818.042, "dur": 0.430, + "args": { + "External id": 2941900,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016101826.001, "dur": 1.899, + "args": { + "External id": 2941901,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101827.028, "dur": 0.587, + "args": { + "External id": 2941902,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101828.923, "dur": 6.854, + "args": { + "External id": 2941903,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016101833.713, "dur": 1.863, + "args": { + "External id": 2941904,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016101836.360, "dur": 4.381, + "args": { + "External id": 2941905,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101839.695, "dur": 0.805, + "args": { + "External id": 2941906,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101841.327, "dur": 4.261, + "args": { + "External id": 2941907,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101841.667, "dur": 3.826, + "args": { + "External id": 2941908,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016101846.940, "dur": 58.484, + "args": { + "External id": 2941909,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101910.762, "dur": 1.102, + "args": { + "External id": 2941910,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016101912.570, "dur": 3.876, + "args": { + "External id": 2941911,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016101915.393, "dur": 0.488, + "args": { + "External id": 2941912,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101918.594, "dur": 0.892, + "args": { + "External id": 2941913,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016101960.050, "dur": 7.933, + "args": { + "External id": 2941914,"Record function id": 0, "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016101962.191, "dur": 5.129, + "args": { + "External id": 2941915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016101963.663, "dur": 2.606, + "args": { + "External id": 2941916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016101964.664, "dur": 1.486, + "args": { + "External id": 2941917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101972.027, "dur": 7.040, + "args": { + "External id": 2941918,"Record function id": 0, "Sequence number": 29294684, "Fwd thread id": 1, "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016101973.081, "dur": 3.405, + "args": { + "External id": 2941919,"Sequence number": 29294684, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 990 + } + }, + { + "ph": "f", "id": 104, "pid": 1336755, "tid": 1381158, "ts": 1555016101973.081, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016101974.819, "dur": 1.485, + "args": { + "External id": 2941920,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016101975.485, "dur": 0.682, + "args": { + "External id": 2941921,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102025.576, "dur": 161.448, + "args": { + "External id": 2941922,"Record function id": 0, "Sequence number": 29294683, "Fwd thread id": 1, "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102028.047, "dur": 148.872, + "args": { + "External id": 2941923,"Sequence number": 29294683, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 994 + } + }, + { + "ph": "f", "id": 105, "pid": 1336755, "tid": 1381158, "ts": 1555016102028.047, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102033.433, "dur": 4.717, + "args": { + "External id": 2941924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102034.815, "dur": 2.669, + "args": { + "External id": 2941925,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102036.202, "dur": 0.927, + "args": { + "External id": 2941926,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016102039.239, "dur": 51.132, + "args": { + "External id": 2941927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102091.594, "dur": 8.332, + "args": { + "External id": 2941928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102092.418, "dur": 6.411, + "args": { + "External id": 2941929,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102097.977, "dur": 0.702, + "args": { + "External id": 2941930,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102101.589, "dur": 4.873, + "args": { + "External id": 2941931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102102.698, "dur": 3.148, + "args": { + "External id": 2941932,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102103.565, "dur": 2.214, + "args": { + "External id": 2941933,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016102106.995, "dur": 68.363, + "args": { + "External id": 2941934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102194.148, "dur": 12.960, + "args": { + "External id": 2941935,"Record function id": 0, "Sequence number": 29294682, "Fwd thread id": 1, "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102195.666, "dur": 9.758, + "args": { + "External id": 2941936,"Sequence number": 29294682, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1007 + } + }, + { + "ph": "f", "id": 106, "pid": 1336755, "tid": 1381158, "ts": 1555016102195.666, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102197.459, "dur": 7.786, + "args": { + "External id": 2941937,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102203.338, "dur": 1.816, + "args": { + "External id": 2941938,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102210.457, "dur": 8.297, + "args": { + "External id": 2941939,"Record function id": 0, "Sequence number": 29294681, "Fwd thread id": 1, "Ev Idx": 1010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102211.387, "dur": 4.585, + "args": { + "External id": 2941940,"Sequence number": 29294681, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1011 + } + }, + { + "ph": "f", "id": 107, "pid": 1336755, "tid": 1381158, "ts": 1555016102211.387, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102212.828, "dur": 2.925, + "args": { + "External id": 2941941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102213.581, "dur": 1.656, + "args": { + "External id": 2941942,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102214.641, "dur": 0.480, + "args": { + "External id": 2941943,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016102222.768, "dur": 5.669, + "args": { + "External id": 2941944,"Record function id": 0, "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016102224.039, "dur": 3.902, + "args": { + "External id": 2941945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016102225.182, "dur": 2.482, + "args": { + "External id": 2941946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016102225.953, "dur": 1.644, + "args": { + "External id": 2941947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102233.768, "dur": 11.211, + "args": { + "External id": 2941948,"Record function id": 0, "Sequence number": 29294680, "Fwd thread id": 1, "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102234.645, "dur": 7.701, + "args": { + "External id": 2941949,"Sequence number": 29294680, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 1020 + } + }, + { + "ph": "f", "id": 108, "pid": 1336755, "tid": 1381158, "ts": 1555016102234.645, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102238.139, "dur": 4.050, + "args": { + "External id": 2941950,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102241.403, "dur": 0.682, + "args": { + "External id": 2941951,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102248.254, "dur": 143.308, + "args": { + "External id": 2941952,"Record function id": 0, "Sequence number": 29294679, "Fwd thread id": 1, "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102271.731, "dur": 110.780, + "args": { + "External id": 2941953,"Sequence number": 29294679, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1024 + } + }, + { + "ph": "f", "id": 109, "pid": 1336755, "tid": 1381158, "ts": 1555016102271.731, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102274.929, "dur": 4.393, + "args": { + "External id": 2941954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102275.363, "dur": 3.511, + "args": { + "External id": 2941955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102278.151, "dur": 0.615, + "args": { + "External id": 2941956,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016102280.008, "dur": 37.944, + "args": { + "External id": 2941957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102319.049, "dur": 7.163, + "args": { + "External id": 2941958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102319.827, "dur": 5.865, + "args": { + "External id": 2941959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102320.751, "dur": 4.776, + "args": { + "External id": 2941960,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102327.889, "dur": 6.147, + "args": { + "External id": 2941961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102328.840, "dur": 4.791, + "args": { + "External id": 2941962,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102333.171, "dur": 0.358, + "args": { + "External id": 2941963,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016102334.783, "dur": 47.010, + "args": { + "External id": 2941964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102396.125, "dur": 31.382, + "args": { + "External id": 2941965,"Record function id": 0, "Sequence number": 29294678, "Fwd thread id": 1, "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102397.134, "dur": 3.266, + "args": { + "External id": 2941966,"Sequence number": 29294678, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1037 + } + }, + { + "ph": "f", "id": 110, "pid": 1336755, "tid": 1381158, "ts": 1555016102397.134, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102398.593, "dur": 1.652, + "args": { + "External id": 2941967,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102399.067, "dur": 1.070, + "args": { + "External id": 2941968,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016102403.024, "dur": 21.222, + "args": { + "External id": 2941969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102431.241, "dur": 12.463, + "args": { + "External id": 2941970,"Record function id": 0, "Sequence number": 29294677, "Fwd thread id": 1, "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102432.088, "dur": 9.278, + "args": { + "External id": 2941971,"Sequence number": 29294677, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1042 + } + }, + { + "ph": "f", "id": 111, "pid": 1336755, "tid": 1381158, "ts": 1555016102432.088, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102435.463, "dur": 5.688, + "args": { + "External id": 2941972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102436.250, "dur": 4.302, + "args": { + "External id": 2941973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102439.806, "dur": 0.660, + "args": { + "External id": 2941974,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016102447.495, "dur": 4.791, + "args": { + "External id": 2941975,"Record function id": 0, "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016102448.642, "dur": 2.981, + "args": { + "External id": 2941976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016102449.504, "dur": 1.482, + "args": { + "External id": 2941977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016102449.964, "dur": 0.948, + "args": { + "External id": 2941978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016102456.152, "dur": 413.341, + "args": { + "External id": 2941979,"Record function id": 0, "Sequence number": 29294676, "Fwd thread id": 1, "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016102457.478, "dur": 376.531, + "args": { + "External id": 2941980,"Sequence number": 29294676, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 1051 + } + }, + { + "ph": "f", "id": 112, "pid": 1336755, "tid": 1381158, "ts": 1555016102457.478, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1381158, + "ts": 1555016102480.850, "dur": 33.164, + "args": { + "External id": 2941981,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016102482.283, "dur": 31.547, + "args": { + "External id": 2941982,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016102484.730, "dur": 5.059, + "args": { + "External id": 2941983,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016102486.806, "dur": 2.502, + "args": { + "External id": 2941984,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016102491.087, "dur": 22.065, + "args": { + "External id": 2941985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102529.076, "dur": 3.900, + "args": { + "External id": 2941986,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102529.693, "dur": 3.107, + "args": { + "External id": 2941987,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102536.799, "dur": 1.217, + "args": { + "External id": 2941988,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102537.175, "dur": 0.731, + "args": { + "External id": 2941989,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016102551.852, "dur": 2.021, + "args": { + "External id": 2941990,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016102563.853, "dur": 2.098, + "args": { + "External id": 2941991,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102724.901, "dur": 2.279, + "args": { + "External id": 2941992,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016102733.833, "dur": 29.753, + "args": { + "External id": 2941993,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102741.688, "dur": 0.557, + "args": { + "External id": 2941994,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016102768.900, "dur": 32.510, + "args": { + "External id": 2941995,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016102770.319, "dur": 30.888, + "args": { + "External id": 2941996,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102776.530, "dur": 3.934, + "args": { + "External id": 2941997,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016102783.681, "dur": 16.973, + "args": { + "External id": 2941998,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016102805.048, "dur": 2.469, + "args": { + "External id": 2941999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102806.063, "dur": 1.287, + "args": { + "External id": 2942000,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102813.333, "dur": 3.656, + "args": { + "External id": 2942001,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102813.956, "dur": 2.923, + "args": { + "External id": 2942002,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102818.804, "dur": 3.873, + "args": { + "External id": 2942003,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102821.579, "dur": 1.005, + "args": { + "External id": 2942004,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016102851.736, "dur": 16.253, + "args": { + "External id": 2942005,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016102878.576, "dur": 7.133, + "args": { + "External id": 2942006,"Record function id": 0, "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016102880.620, "dur": 4.416, + "args": { + "External id": 2942007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016102882.046, "dur": 2.204, + "args": { + "External id": 2942008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016102882.656, "dur": 1.504, + "args": { + "External id": 2942009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102889.330, "dur": 7.148, + "args": { + "External id": 2942010,"Record function id": 0, "Sequence number": 29294675, "Fwd thread id": 1, "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102890.585, "dur": 3.379, + "args": { + "External id": 2942011,"Sequence number": 29294675, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1082 + } + }, + { + "ph": "f", "id": 113, "pid": 1336755, "tid": 1381158, "ts": 1555016102890.585, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016102892.151, "dur": 1.658, + "args": { + "External id": 2942012,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016102892.789, "dur": 0.893, + "args": { + "External id": 2942013,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102902.065, "dur": 167.185, + "args": { + "External id": 2942014,"Record function id": 0, "Sequence number": 29294674, "Fwd thread id": 1, "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016102903.451, "dur": 155.968, + "args": { + "External id": 2942015,"Sequence number": 29294674, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1086 + } + }, + { + "ph": "f", "id": 114, "pid": 1336755, "tid": 1381158, "ts": 1555016102903.451, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102906.544, "dur": 3.272, + "args": { + "External id": 2942016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102907.653, "dur": 1.700, + "args": { + "External id": 2942017,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102908.627, "dur": 0.529, + "args": { + "External id": 2942018,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016102910.917, "dur": 48.382, + "args": { + "External id": 2942019,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102960.509, "dur": 9.155, + "args": { + "External id": 2942020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102963.402, "dur": 5.714, + "args": { + "External id": 2942021,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102965.884, "dur": 3.073, + "args": { + "External id": 2942022,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016102971.103, "dur": 3.112, + "args": { + "External id": 2942023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016102972.657, "dur": 1.139, + "args": { + "External id": 2942024,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016102973.208, "dur": 0.525, + "args": { + "External id": 2942025,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016102974.909, "dur": 83.046, + "args": { + "External id": 2942026,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103076.271, "dur": 14.941, + "args": { + "External id": 2942027,"Record function id": 0, "Sequence number": 29294673, "Fwd thread id": 1, "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103077.553, "dur": 11.406, + "args": { + "External id": 2942028,"Sequence number": 29294673, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1099 + } + }, + { + "ph": "f", "id": 115, "pid": 1336755, "tid": 1381158, "ts": 1555016103077.553, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016103081.927, "dur": 6.858, + "args": { + "External id": 2942029,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016103084.846, "dur": 3.781, + "args": { + "External id": 2942030,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103097.339, "dur": 7.660, + "args": { + "External id": 2942031,"Record function id": 0, "Sequence number": 29294672, "Fwd thread id": 1, "Ev Idx": 1102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103098.342, "dur": 4.081, + "args": { + "External id": 2942032,"Sequence number": 29294672, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1103 + } + }, + { + "ph": "f", "id": 116, "pid": 1336755, "tid": 1381158, "ts": 1555016103098.342, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016103099.463, "dur": 2.784, + "args": { + "External id": 2942033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016103100.303, "dur": 1.462, + "args": { + "External id": 2942034,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103101.109, "dur": 0.495, + "args": { + "External id": 2942035,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016103109.251, "dur": 8.534, + "args": { + "External id": 2942036,"Record function id": 0, "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016103110.552, "dur": 6.653, + "args": { + "External id": 2942037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016103111.898, "dur": 5.019, + "args": { + "External id": 2942038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016103115.092, "dur": 1.722, + "args": { + "External id": 2942039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103122.711, "dur": 6.390, + "args": { + "External id": 2942040,"Record function id": 0, "Sequence number": 29294671, "Fwd thread id": 1, "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103123.423, "dur": 3.416, + "args": { + "External id": 2942041,"Sequence number": 29294671, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1112 + } + }, + { + "ph": "f", "id": 117, "pid": 1336755, "tid": 1381158, "ts": 1555016103123.423, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016103124.714, "dur": 1.985, + "args": { + "External id": 2942042,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016103125.394, "dur": 1.160, + "args": { + "External id": 2942043,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016103135.250, "dur": 324.701, + "args": { + "External id": 2942044,"Record function id": 0, "Sequence number": 29294670, "Fwd thread id": 1, "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016103136.455, "dur": 297.600, + "args": { + "External id": 2942045,"Sequence number": 29294670, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1116 + } + }, + { + "ph": "f", "id": 118, "pid": 1336755, "tid": 1381158, "ts": 1555016103136.455, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016103172.470, "dur": 7.675, + "args": { + "External id": 2942046,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103174.739, "dur": 4.770, + "args": { + "External id": 2942047,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016103182.363, "dur": 6.782, + "args": { + "External id": 2942048,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103185.258, "dur": 3.683, + "args": { + "External id": 2942049,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016103199.604, "dur": 4.878, + "args": { + "External id": 2942050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103202.571, "dur": 1.726, + "args": { + "External id": 2942051,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016103231.071, "dur": 176.423, + "args": { + "External id": 2942052,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016103311.189, "dur": 5.906, + "args": { + "External id": 2942053,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016103318.697, "dur": 4.091, + "args": { + "External id": 2942054,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016103420.026, "dur": 3.779, + "args": { + "External id": 2942055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016103427.066, "dur": 0.999, + "args": { + "External id": 2942056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016103430.125, "dur": 0.731, + "args": { + "External id": 2942057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016103468.659, "dur": 224.704, + "args": { + "External id": 2942058,"Record function id": 0, "Sequence number": 29294669, "Fwd thread id": 1, "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016103470.187, "dur": 215.581, + "args": { + "External id": 2942059,"Sequence number": 29294669, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1130 + } + }, + { + "ph": "f", "id": 119, "pid": 1336755, "tid": 1381158, "ts": 1555016103470.187, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016103489.683, "dur": 45.105, + "args": { + "External id": 2942060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103492.465, "dur": 2.779, + "args": { + "External id": 2942061,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016103496.645, "dur": 37.435, + "args": { + "External id": 2942062,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016103544.129, "dur": 6.393, + "args": { + "External id": 2942063,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103547.643, "dur": 2.575, + "args": { + "External id": 2942064,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016103699.578, "dur": 159.428, + "args": { + "External id": 2942065,"Record function id": 0, "Sequence number": 29294668, "Fwd thread id": 1, "Ev Idx": 1136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016103701.124, "dur": 152.425, + "args": { + "External id": 2942066,"Sequence number": 29294668, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1137 + } + }, + { + "ph": "f", "id": 120, "pid": 1336755, "tid": 1381158, "ts": 1555016103701.124, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016103713.393, "dur": 44.472, + "args": { + "External id": 2942067,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103715.286, "dur": 2.343, + "args": { + "External id": 2942068,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016103722.944, "dur": 34.392, + "args": { + "External id": 2942069,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016103765.010, "dur": 5.362, + "args": { + "External id": 2942070,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103768.209, "dur": 1.856, + "args": { + "External id": 2942071,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103867.576, "dur": 15.469, + "args": { + "External id": 2942072,"Record function id": 0, "Sequence number": 29294667, "Fwd thread id": 1, "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103868.836, "dur": 10.997, + "args": { + "External id": 2942073,"Sequence number": 29294667, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1144 + } + }, + { + "ph": "f", "id": 121, "pid": 1336755, "tid": 1381158, "ts": 1555016103868.836, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016103870.955, "dur": 8.598, + "args": { + "External id": 2942074,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016103871.891, "dur": 7.500, + "args": { + "External id": 2942075,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103886.613, "dur": 9.655, + "args": { + "External id": 2942076,"Record function id": 0, "Sequence number": 29294666, "Fwd thread id": 1, "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103887.751, "dur": 6.006, + "args": { + "External id": 2942077,"Sequence number": 29294666, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1148 + } + }, + { + "ph": "f", "id": 122, "pid": 1336755, "tid": 1381158, "ts": 1555016103887.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016103889.156, "dur": 4.468, + "args": { + "External id": 2942078,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016103892.478, "dur": 1.006, + "args": { + "External id": 2942079,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103899.521, "dur": 8.657, + "args": { + "External id": 2942080,"Record function id": 0, "Sequence number": 29294665, "Fwd thread id": 1, "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103900.265, "dur": 5.075, + "args": { + "External id": 2942081,"Sequence number": 29294665, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1152 + } + }, + { + "ph": "f", "id": 123, "pid": 1336755, "tid": 1381158, "ts": 1555016103900.265, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016103903.823, "dur": 1.380, + "args": { + "External id": 2942082,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016103904.308, "dur": 0.763, + "args": { + "External id": 2942083,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103911.317, "dur": 9.191, + "args": { + "External id": 2942084,"Record function id": 0, "Sequence number": 29294664, "Fwd thread id": 1, "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103912.162, "dur": 5.334, + "args": { + "External id": 2942085,"Sequence number": 29294664, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1156 + } + }, + { + "ph": "f", "id": 124, "pid": 1336755, "tid": 1381158, "ts": 1555016103912.162, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016103913.410, "dur": 3.954, + "args": { + "External id": 2942086,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016103916.448, "dur": 0.772, + "args": { + "External id": 2942087,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103923.870, "dur": 210.249, + "args": { + "External id": 2942088,"Record function id": 0, "Sequence number": 29294663, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016103924.697, "dur": 198.715, + "args": { + "External id": 2942089,"Sequence number": 29294663, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 125, "pid": 1336755, "tid": 1381158, "ts": 1555016103924.697, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016103928.955, "dur": 7.210, + "args": { + "External id": 2942090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016103932.488, "dur": 3.097, + "args": { + "External id": 2942091,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016103933.903, "dur": 1.384, + "args": { + "External id": 2942092,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016103937.650, "dur": 112.371, + "args": { + "External id": 2942093,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104053.011, "dur": 5.798, + "args": { + "External id": 2942094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104054.366, "dur": 3.251, + "args": { + "External id": 2942095,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104056.237, "dur": 1.267, + "args": { + "External id": 2942096,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104061.180, "dur": 8.074, + "args": { + "External id": 2942097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104065.348, "dur": 3.316, + "args": { + "External id": 2942098,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104068.100, "dur": 0.495, + "args": { + "External id": 2942099,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016104070.160, "dur": 52.407, + "args": { + "External id": 2942100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104159.291, "dur": 10.490, + "args": { + "External id": 2942101,"Record function id": 0, "Sequence number": 29294662, "Fwd thread id": 1, "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104161.712, "dur": 5.394, + "args": { + "External id": 2942102,"Sequence number": 29294662, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1173 + } + }, + { + "ph": "f", "id": 126, "pid": 1336755, "tid": 1381158, "ts": 1555016104161.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104163.863, "dur": 3.084, + "args": { + "External id": 2942103,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104164.683, "dur": 2.014, + "args": { + "External id": 2942104,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104174.769, "dur": 9.744, + "args": { + "External id": 2942105,"Record function id": 0, "Sequence number": 29294661, "Fwd thread id": 1, "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104175.569, "dur": 6.324, + "args": { + "External id": 2942106,"Sequence number": 29294661, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1177 + } + }, + { + "ph": "f", "id": 127, "pid": 1336755, "tid": 1381158, "ts": 1555016104175.569, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104178.709, "dur": 2.984, + "args": { + "External id": 2942107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104179.477, "dur": 1.732, + "args": { + "External id": 2942108,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104180.411, "dur": 0.666, + "args": { + "External id": 2942109,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104192.936, "dur": 8.612, + "args": { + "External id": 2942110,"Record function id": 0, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104194.446, "dur": 6.262, + "args": { + "External id": 2942111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104196.796, "dur": 3.384, + "args": { + "External id": 2942112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104197.694, "dur": 2.378, + "args": { + "External id": 2942113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104204.613, "dur": 8.731, + "args": { + "External id": 2942114,"Record function id": 0, "Sequence number": 29294660, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104205.574, "dur": 5.001, + "args": { + "External id": 2942115,"Sequence number": 29294660, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 128, "pid": 1336755, "tid": 1381158, "ts": 1555016104205.574, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104206.866, "dur": 3.561, + "args": { + "External id": 2942116,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104209.534, "dur": 0.736, + "args": { + "External id": 2942117,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104216.732, "dur": 104.021, + "args": { + "External id": 2942118,"Record function id": 0, "Sequence number": 29294659, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104217.459, "dur": 96.392, + "args": { + "External id": 2942119,"Sequence number": 29294659, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 129, "pid": 1336755, "tid": 1381158, "ts": 1555016104217.459, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104220.495, "dur": 2.360, + "args": { + "External id": 2942120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104221.208, "dur": 1.240, + "args": { + "External id": 2942121,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104221.820, "dur": 0.504, + "args": { + "External id": 2942122,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016104224.071, "dur": 35.687, + "args": { + "External id": 2942123,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104263.284, "dur": 5.110, + "args": { + "External id": 2942124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104263.890, "dur": 4.006, + "args": { + "External id": 2942125,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104267.033, "dur": 0.741, + "args": { + "External id": 2942126,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104269.425, "dur": 4.771, + "args": { + "External id": 2942127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104270.192, "dur": 3.491, + "args": { + "External id": 2942128,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104270.950, "dur": 2.615, + "args": { + "External id": 2942129,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016104277.141, "dur": 36.060, + "args": { + "External id": 2942130,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104325.152, "dur": 35.653, + "args": { + "External id": 2942131,"Record function id": 0, "Sequence number": 29294658, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104326.184, "dur": 5.888, + "args": { + "External id": 2942132,"Sequence number": 29294658, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 130, "pid": 1336755, "tid": 1381158, "ts": 1555016104326.184, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104327.679, "dur": 4.256, + "args": { + "External id": 2942133,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104330.663, "dur": 1.123, + "args": { + "External id": 2942134,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016104335.476, "dur": 22.671, + "args": { + "External id": 2942135,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104367.201, "dur": 8.801, + "args": { + "External id": 2942136,"Record function id": 0, "Sequence number": 29294657, "Fwd thread id": 1, "Ev Idx": 1207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104368.043, "dur": 6.038, + "args": { + "External id": 2942137,"Sequence number": 29294657, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1208 + } + }, + { + "ph": "f", "id": 131, "pid": 1336755, "tid": 1381158, "ts": 1555016104368.043, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104368.984, "dur": 4.903, + "args": { + "External id": 2942138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104371.826, "dur": 1.598, + "args": { + "External id": 2942139,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104372.800, "dur": 0.493, + "args": { + "External id": 2942140,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104379.897, "dur": 5.280, + "args": { + "External id": 2942141,"Record function id": 0, "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104381.307, "dur": 3.380, + "args": { + "External id": 2942142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104382.338, "dur": 1.827, + "args": { + "External id": 2942143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104382.901, "dur": 1.185, + "args": { + "External id": 2942144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104388.600, "dur": 11.696, + "args": { + "External id": 2942145,"Record function id": 0, "Sequence number": 29294656, "Fwd thread id": 1, "Ev Idx": 1216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104389.964, "dur": 7.766, + "args": { + "External id": 2942146,"Sequence number": 29294656, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1217 + } + }, + { + "ph": "f", "id": 132, "pid": 1336755, "tid": 1381158, "ts": 1555016104389.964, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104391.052, "dur": 6.512, + "args": { + "External id": 2942147,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104396.550, "dur": 0.906, + "args": { + "External id": 2942148,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104405.962, "dur": 100.569, + "args": { + "External id": 2942149,"Record function id": 0, "Sequence number": 29294655, "Fwd thread id": 1, "Ev Idx": 1220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104406.671, "dur": 90.564, + "args": { + "External id": 2942150,"Sequence number": 29294655, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1221 + } + }, + { + "ph": "f", "id": 133, "pid": 1336755, "tid": 1381158, "ts": 1555016104406.671, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104409.849, "dur": 2.307, + "args": { + "External id": 2942151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104410.479, "dur": 1.265, + "args": { + "External id": 2942152,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104411.197, "dur": 0.428, + "args": { + "External id": 2942153,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016104412.811, "dur": 29.695, + "args": { + "External id": 2942154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104443.559, "dur": 7.920, + "args": { + "External id": 2942155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104447.201, "dur": 3.760, + "args": { + "External id": 2942156,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104448.233, "dur": 2.564, + "args": { + "External id": 2942157,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104452.941, "dur": 4.929, + "args": { + "External id": 2942158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104453.866, "dur": 3.429, + "args": { + "External id": 2942159,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104456.651, "dur": 0.558, + "args": { + "External id": 2942160,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016104458.492, "dur": 37.958, + "args": { + "External id": 2942161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104511.210, "dur": 26.816, + "args": { + "External id": 2942162,"Record function id": 0, "Sequence number": 29294654, "Fwd thread id": 1, "Ev Idx": 1233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104512.132, "dur": 5.828, + "args": { + "External id": 2942163,"Sequence number": 29294654, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1234 + } + }, + { + "ph": "f", "id": 134, "pid": 1336755, "tid": 1381158, "ts": 1555016104512.132, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104515.897, "dur": 1.919, + "args": { + "External id": 2942164,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104516.516, "dur": 1.159, + "args": { + "External id": 2942165,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016104520.707, "dur": 15.325, + "args": { + "External id": 2942166,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104541.630, "dur": 6.688, + "args": { + "External id": 2942167,"Record function id": 0, "Sequence number": 29294653, "Fwd thread id": 1, "Ev Idx": 1238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104542.630, "dur": 3.325, + "args": { + "External id": 2942168,"Sequence number": 29294653, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1239 + } + }, + { + "ph": "f", "id": 135, "pid": 1336755, "tid": 1381158, "ts": 1555016104542.630, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104543.325, "dur": 2.427, + "args": { + "External id": 2942169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104543.887, "dur": 1.366, + "args": { + "External id": 2942170,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104544.765, "dur": 0.367, + "args": { + "External id": 2942171,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104554.314, "dur": 6.567, + "args": { + "External id": 2942172,"Record function id": 0, "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104555.709, "dur": 4.726, + "args": { + "External id": 2942173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104556.546, "dur": 3.415, + "args": { + "External id": 2942174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104558.845, "dur": 1.035, + "args": { + "External id": 2942175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016104566.963, "dur": 341.457, + "args": { + "External id": 2942176,"Record function id": 0, "Sequence number": 29294652, "Fwd thread id": 1, "Ev Idx": 1247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016104568.081, "dur": 309.655, + "args": { + "External id": 2942177,"Sequence number": 29294652, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1248 + } + }, + { + "ph": "f", "id": 136, "pid": 1336755, "tid": 1381158, "ts": 1555016104568.081, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104600.330, "dur": 1.933, + "args": { + "External id": 2942178,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104600.924, "dur": 1.181, + "args": { + "External id": 2942179,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016104615.482, "dur": 5.668, + "args": { + "External id": 2942180,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016104630.320, "dur": 1.765, + "args": { + "External id": 2942181,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104778.927, "dur": 1.503, + "args": { + "External id": 2942182,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016104784.740, "dur": 32.235, + "args": { + "External id": 2942183,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104792.543, "dur": 0.709, + "args": { + "External id": 2942184,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016104822.564, "dur": 31.808, + "args": { + "External id": 2942185,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016104824.435, "dur": 29.727, + "args": { + "External id": 2942186,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016104830.057, "dur": 4.966, + "args": { + "External id": 2942187,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016104836.511, "dur": 17.183, + "args": { + "External id": 2942188,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016104858.464, "dur": 4.599, + "args": { + "External id": 2942189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104861.912, "dur": 1.023, + "args": { + "External id": 2942190,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104869.420, "dur": 1.782, + "args": { + "External id": 2942191,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016104870.157, "dur": 0.944, + "args": { + "External id": 2942192,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016104887.975, "dur": 16.810, + "args": { + "External id": 2942193,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104916.580, "dur": 6.731, + "args": { + "External id": 2942194,"Record function id": 0, "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016104918.073, "dur": 4.458, + "args": { + "External id": 2942195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104919.458, "dur": 2.167, + "args": { + "External id": 2942196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016104920.215, "dur": 1.276, + "args": { + "External id": 2942197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104927.331, "dur": 8.975, + "args": { + "External id": 2942198,"Record function id": 0, "Sequence number": 29294651, "Fwd thread id": 1, "Ev Idx": 1269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016104930.950, "dur": 1.090, + "args": { + "External id": 2942199,"Sequence number": 29294651, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1270 + } + }, + { + "ph": "f", "id": 137, "pid": 1336755, "tid": 1381158, "ts": 1555016104930.950, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016104939.957, "dur": 482.718, + "args": { + "External id": 2942200,"Record function id": 0, "Sequence number": 29294650, "Fwd thread id": 1, "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016104943.010, "dur": 465.284, + "args": { + "External id": 2942201,"Sequence number": 29294650, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1272 + } + }, + { + "ph": "f", "id": 138, "pid": 1336755, "tid": 1381158, "ts": 1555016104943.010, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016104970.961, "dur": 7.336, + "args": { + "External id": 2942202,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016104975.287, "dur": 2.777, + "args": { + "External id": 2942203,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016104981.838, "dur": 49.953, + "args": { + "External id": 2942204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016104982.840, "dur": 47.966, + "args": { + "External id": 2942205,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105027.259, "dur": 3.049, + "args": { + "External id": 2942206,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1381158, + "ts": 1555016105035.742, "dur": 87.011, + "args": { + "External id": 2942207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 1278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105036.830, "dur": 5.141, + "args": { + "External id": 2942208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105037.800, "dur": 3.708, + "args": { + "External id": 2942209,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105040.888, "dur": 0.540, + "args": { + "External id": 2942210,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1381158, + "ts": 1555016105043.300, "dur": 78.654, + "args": { + "External id": 2942211,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016105044.571, "dur": 76.743, + "args": { + "External id": 2942212,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016105128.573, "dur": 3.119, + "args": { + "External id": 2942213,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105129.867, "dur": 1.679, + "args": { + "External id": 2942214,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016105179.834, "dur": 7.300, + "args": { + "External id": 2942215,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016105190.411, "dur": 2.045, + "args": { + "External id": 2942216,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016105193.205, "dur": 1.762, + "args": { + "External id": 2942217,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105231.451, "dur": 2.697, + "args": { + "External id": 2942218,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105232.280, "dur": 1.706, + "args": { + "External id": 2942219,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336755, "tid": 1381158, + "ts": 1555016105255.293, "dur": 132.673, + "args": { + "External id": 2942220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016105259.825, "dur": 10.737, + "args": { + "External id": 2942221,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105264.899, "dur": 4.715, + "args": { + "External id": 2942222,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016105272.001, "dur": 6.036, + "args": { + "External id": 2942223,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105276.717, "dur": 0.457, + "args": { + "External id": 2942224,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1381158, + "ts": 1555016105279.641, "dur": 1.531, + "args": { + "External id": 2942225,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105280.342, "dur": 0.502, + "args": { + "External id": 2942226,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016105284.505, "dur": 1.997, + "args": { + "External id": 2942227,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105285.368, "dur": 0.540, + "args": { + "External id": 2942228,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016105292.346, "dur": 4.189, + "args": { + "External id": 2942229,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 1300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105295.769, "dur": 0.460, + "args": { + "External id": 2942230,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105297.093, "dur": 7.718, + "args": { + "External id": 2942231,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 1302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016105302.393, "dur": 2.243, + "args": { + "External id": 2942232,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016105305.441, "dur": 4.028, + "args": { + "External id": 2942233,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105308.773, "dur": 0.411, + "args": { + "External id": 2942234,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105309.947, "dur": 3.533, + "args": { + "External id": 2942235,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105310.481, "dur": 2.917, + "args": { + "External id": 2942236,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016105314.729, "dur": 60.446, + "args": { + "External id": 2942237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105378.524, "dur": 1.140, + "args": { + "External id": 2942238,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336755, "tid": 1381158, + "ts": 1555016105380.142, "dur": 2.071, + "args": { + "External id": 2942239,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105381.219, "dur": 0.439, + "args": { + "External id": 2942240,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105386.200, "dur": 0.819, + "args": { + "External id": 2942241,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016105434.049, "dur": 8.957, + "args": { + "External id": 2942242,"Record function id": 0, "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016105435.894, "dur": 6.382, + "args": { + "External id": 2942243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016105437.832, "dur": 3.257, + "args": { + "External id": 2942244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016105438.931, "dur": 2.057, + "args": { + "External id": 2942245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105446.712, "dur": 7.069, + "args": { + "External id": 2942246,"Record function id": 0, "Sequence number": 29294649, "Fwd thread id": 1, "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105447.709, "dur": 3.058, + "args": { + "External id": 2942247,"Sequence number": 29294649, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 1318 + } + }, + { + "ph": "f", "id": 139, "pid": 1336755, "tid": 1381158, "ts": 1555016105447.709, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105449.200, "dur": 1.382, + "args": { + "External id": 2942248,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105449.732, "dur": 0.700, + "args": { + "External id": 2942249,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105457.369, "dur": 125.918, + "args": { + "External id": 2942250,"Record function id": 0, "Sequence number": 29294648, "Fwd thread id": 1, "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105458.260, "dur": 117.759, + "args": { + "External id": 2942251,"Sequence number": 29294648, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1322 + } + }, + { + "ph": "f", "id": 140, "pid": 1336755, "tid": 1381158, "ts": 1555016105458.260, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105465.350, "dur": 3.739, + "args": { + "External id": 2942252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105466.790, "dur": 1.806, + "args": { + "External id": 2942253,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105467.957, "dur": 0.472, + "args": { + "External id": 2942254,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016105470.126, "dur": 42.020, + "args": { + "External id": 2942255,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105513.384, "dur": 8.459, + "args": { + "External id": 2942256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105514.203, "dur": 6.881, + "args": { + "External id": 2942257,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105520.168, "dur": 0.772, + "args": { + "External id": 2942258,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105523.493, "dur": 5.157, + "args": { + "External id": 2942259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105525.039, "dur": 3.142, + "args": { + "External id": 2942260,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105525.683, "dur": 2.412, + "args": { + "External id": 2942261,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016105529.169, "dur": 46.138, + "args": { + "External id": 2942262,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105587.756, "dur": 8.522, + "args": { + "External id": 2942263,"Record function id": 0, "Sequence number": 29294647, "Fwd thread id": 1, "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105588.735, "dur": 5.905, + "args": { + "External id": 2942264,"Sequence number": 29294647, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1335 + } + }, + { + "ph": "f", "id": 141, "pid": 1336755, "tid": 1381158, "ts": 1555016105588.735, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105590.325, "dur": 4.156, + "args": { + "External id": 2942265,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105593.135, "dur": 1.214, + "args": { + "External id": 2942266,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105599.462, "dur": 9.280, + "args": { + "External id": 2942267,"Record function id": 0, "Sequence number": 29294646, "Fwd thread id": 1, "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105600.382, "dur": 5.956, + "args": { + "External id": 2942268,"Sequence number": 29294646, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1339 + } + }, + { + "ph": "f", "id": 142, "pid": 1336755, "tid": 1381158, "ts": 1555016105600.382, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105601.288, "dur": 4.816, + "args": { + "External id": 2942269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105601.875, "dur": 3.741, + "args": { + "External id": 2942270,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105604.906, "dur": 0.537, + "args": { + "External id": 2942271,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016105612.450, "dur": 4.297, + "args": { + "External id": 2942272,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016105613.693, "dur": 2.580, + "args": { + "External id": 2942273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016105614.462, "dur": 1.595, + "args": { + "External id": 2942274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016105614.947, "dur": 1.005, + "args": { + "External id": 2942275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105619.850, "dur": 7.276, + "args": { + "External id": 2942276,"Record function id": 0, "Sequence number": 29294645, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105620.657, "dur": 4.103, + "args": { + "External id": 2942277,"Sequence number": 29294645, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 143, "pid": 1336755, "tid": 1381158, "ts": 1555016105620.657, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105623.441, "dur": 1.169, + "args": { + "External id": 2942278,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105623.835, "dur": 0.641, + "args": { + "External id": 2942279,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105630.396, "dur": 114.178, + "args": { + "External id": 2942280,"Record function id": 0, "Sequence number": 29294644, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105631.279, "dur": 104.202, + "args": { + "External id": 2942281,"Sequence number": 29294644, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 144, "pid": 1336755, "tid": 1381158, "ts": 1555016105631.279, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105633.947, "dur": 6.242, + "args": { + "External id": 2942282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105634.657, "dur": 5.134, + "args": { + "External id": 2942283,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105639.190, "dur": 0.492, + "args": { + "External id": 2942284,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016105640.962, "dur": 32.268, + "args": { + "External id": 2942285,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105674.427, "dur": 5.267, + "args": { + "External id": 2942286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105675.055, "dur": 3.977, + "args": { + "External id": 2942287,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105675.950, "dur": 2.928, + "args": { + "External id": 2942288,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105680.870, "dur": 5.543, + "args": { + "External id": 2942289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105682.514, "dur": 3.503, + "args": { + "External id": 2942290,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105685.543, "dur": 0.367, + "args": { + "External id": 2942291,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016105686.862, "dur": 47.995, + "args": { + "External id": 2942292,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105750.807, "dur": 32.139, + "args": { + "External id": 2942293,"Record function id": 0, "Sequence number": 29294643, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105751.796, "dur": 5.205, + "args": { + "External id": 2942294,"Sequence number": 29294643, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 145, "pid": 1336755, "tid": 1381158, "ts": 1555016105751.796, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105755.052, "dur": 1.807, + "args": { + "External id": 2942295,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105755.648, "dur": 1.099, + "args": { + "External id": 2942296,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016105759.414, "dur": 20.420, + "args": { + "External id": 2942297,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105786.527, "dur": 9.863, + "args": { + "External id": 2942298,"Record function id": 0, "Sequence number": 29294642, "Fwd thread id": 1, "Ev Idx": 1369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016105787.474, "dur": 6.248, + "args": { + "External id": 2942299,"Sequence number": 29294642, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1370 + } + }, + { + "ph": "f", "id": 146, "pid": 1336755, "tid": 1381158, "ts": 1555016105787.474, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016105788.340, "dur": 5.149, + "args": { + "External id": 2942300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016105791.079, "dur": 1.954, + "args": { + "External id": 2942301,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016105792.140, "dur": 0.752, + "args": { + "External id": 2942302,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016105800.242, "dur": 6.844, + "args": { + "External id": 2942303,"Record function id": 0, "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016105803.689, "dur": 2.904, + "args": { + "External id": 2942304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016105804.587, "dur": 1.481, + "args": { + "External id": 2942305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016105805.049, "dur": 0.934, + "args": { + "External id": 2942306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016105810.870, "dur": 497.662, + "args": { + "External id": 2942307,"Record function id": 0, "Sequence number": 29294641, "Fwd thread id": 1, "Ev Idx": 1378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016105815.642, "dur": 453.415, + "args": { + "External id": 2942308,"Sequence number": 29294641, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 1379 + } + }, + { + "ph": "f", "id": 147, "pid": 1336755, "tid": 1381158, "ts": 1555016105815.642, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1381158, + "ts": 1555016105835.791, "dur": 32.965, + "args": { + "External id": 2942309,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016105837.066, "dur": 31.463, + "args": { + "External id": 2942310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016105839.660, "dur": 5.181, + "args": { + "External id": 2942311,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 1382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016105841.474, "dur": 2.768, + "args": { + "External id": 2942312,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016105846.429, "dur": 21.521, + "args": { + "External id": 2942313,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105882.060, "dur": 6.066, + "args": { + "External id": 2942314,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105884.632, "dur": 3.390, + "args": { + "External id": 2942315,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016105891.645, "dur": 1.261, + "args": { + "External id": 2942316,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016105892.066, "dur": 0.749, + "args": { + "External id": 2942317,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016105905.916, "dur": 2.171, + "args": { + "External id": 2942318,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016105918.273, "dur": 3.499, + "args": { + "External id": 2942319,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016106130.307, "dur": 5.184, + "args": { + "External id": 2942320,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016106140.544, "dur": 53.112, + "args": { + "External id": 2942321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106167.629, "dur": 0.891, + "args": { + "External id": 2942322,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016106199.897, "dur": 30.146, + "args": { + "External id": 2942323,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016106201.728, "dur": 28.058, + "args": { + "External id": 2942324,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106205.670, "dur": 4.098, + "args": { + "External id": 2942325,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016106212.934, "dur": 16.417, + "args": { + "External id": 2942326,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016106234.107, "dur": 5.173, + "args": { + "External id": 2942327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016106237.607, "dur": 1.565, + "args": { + "External id": 2942328,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016106246.803, "dur": 4.758, + "args": { + "External id": 2942329,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016106247.430, "dur": 3.983, + "args": { + "External id": 2942330,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016106253.386, "dur": 3.493, + "args": { + "External id": 2942331,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016106255.861, "dur": 0.926, + "args": { + "External id": 2942332,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016106288.145, "dur": 18.771, + "args": { + "External id": 2942333,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016106321.994, "dur": 9.253, + "args": { + "External id": 2942334,"Record function id": 0, "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016106323.927, "dur": 6.498, + "args": { + "External id": 2942335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016106326.244, "dur": 3.241, + "args": { + "External id": 2942336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016106327.374, "dur": 2.000, + "args": { + "External id": 2942337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106334.955, "dur": 9.806, + "args": { + "External id": 2942338,"Record function id": 0, "Sequence number": 29294640, "Fwd thread id": 1, "Ev Idx": 1409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106335.895, "dur": 6.027, + "args": { + "External id": 2942339,"Sequence number": 29294640, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1410 + } + }, + { + "ph": "f", "id": 148, "pid": 1336755, "tid": 1381158, "ts": 1555016106335.895, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016106337.309, "dur": 4.392, + "args": { + "External id": 2942340,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016106340.582, "dur": 0.971, + "args": { + "External id": 2942341,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106348.480, "dur": 132.708, + "args": { + "External id": 2942342,"Record function id": 0, "Sequence number": 29294639, "Fwd thread id": 1, "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106349.352, "dur": 124.872, + "args": { + "External id": 2942343,"Sequence number": 29294639, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1414 + } + }, + { + "ph": "f", "id": 149, "pid": 1336755, "tid": 1381158, "ts": 1555016106349.352, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016106354.301, "dur": 4.285, + "args": { + "External id": 2942344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016106355.729, "dur": 2.274, + "args": { + "External id": 2942345,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106356.993, "dur": 0.787, + "args": { + "External id": 2942346,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016106359.965, "dur": 56.494, + "args": { + "External id": 2942347,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016106419.569, "dur": 7.231, + "args": { + "External id": 2942348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016106420.372, "dur": 5.850, + "args": { + "External id": 2942349,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106423.620, "dur": 2.451, + "args": { + "External id": 2942350,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016106428.362, "dur": 2.835, + "args": { + "External id": 2942351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016106429.542, "dur": 1.255, + "args": { + "External id": 2942352,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106430.344, "dur": 0.382, + "args": { + "External id": 2942353,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016106433.787, "dur": 39.761, + "args": { + "External id": 2942354,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106485.682, "dur": 11.327, + "args": { + "External id": 2942355,"Record function id": 0, "Sequence number": 29294638, "Fwd thread id": 1, "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106486.706, "dur": 7.549, + "args": { + "External id": 2942356,"Sequence number": 29294638, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1427 + } + }, + { + "ph": "f", "id": 150, "pid": 1336755, "tid": 1381158, "ts": 1555016106486.706, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016106488.254, "dur": 5.848, + "args": { + "External id": 2942357,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016106491.095, "dur": 2.912, + "args": { + "External id": 2942358,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106500.412, "dur": 9.033, + "args": { + "External id": 2942359,"Record function id": 0, "Sequence number": 29294637, "Fwd thread id": 1, "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106501.466, "dur": 5.675, + "args": { + "External id": 2942360,"Sequence number": 29294637, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1431 + } + }, + { + "ph": "f", "id": 151, "pid": 1336755, "tid": 1381158, "ts": 1555016106501.466, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016106502.245, "dur": 4.657, + "args": { + "External id": 2942361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016106503.019, "dur": 3.393, + "args": { + "External id": 2942362,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106505.895, "dur": 0.436, + "args": { + "External id": 2942363,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016106513.190, "dur": 4.732, + "args": { + "External id": 2942364,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016106514.401, "dur": 3.070, + "args": { + "External id": 2942365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016106515.379, "dur": 1.776, + "args": { + "External id": 2942366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016106515.872, "dur": 1.160, + "args": { + "External id": 2942367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106521.362, "dur": 6.550, + "args": { + "External id": 2942368,"Record function id": 0, "Sequence number": 29294636, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016106522.324, "dur": 3.520, + "args": { + "External id": 2942369,"Sequence number": 29294636, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 152, "pid": 1336755, "tid": 1381158, "ts": 1555016106522.324, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016106523.805, "dur": 1.880, + "args": { + "External id": 2942370,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016106524.403, "dur": 1.149, + "args": { + "External id": 2942371,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016106532.258, "dur": 291.136, + "args": { + "External id": 2942372,"Record function id": 0, "Sequence number": 29294635, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016106535.129, "dur": 265.526, + "args": { + "External id": 2942373,"Sequence number": 29294635, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 153, "pid": 1336755, "tid": 1381158, "ts": 1555016106535.129, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016106551.950, "dur": 7.931, + "args": { + "External id": 2942374,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106556.042, "dur": 3.368, + "args": { + "External id": 2942375,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016106561.892, "dur": 4.945, + "args": { + "External id": 2942376,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106564.679, "dur": 1.962, + "args": { + "External id": 2942377,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016106568.245, "dur": 2.673, + "args": { + "External id": 2942378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106568.864, "dur": 1.866, + "args": { + "External id": 2942379,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016106594.102, "dur": 181.170, + "args": { + "External id": 2942380,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016106675.380, "dur": 5.363, + "args": { + "External id": 2942381,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016106684.814, "dur": 4.346, + "args": { + "External id": 2942382,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016106788.090, "dur": 3.284, + "args": { + "External id": 2942383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016106794.454, "dur": 0.705, + "args": { + "External id": 2942384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1381158, + "ts": 1555016106797.294, "dur": 0.773, + "args": { + "External id": 2942385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016106829.573, "dur": 274.716, + "args": { + "External id": 2942386,"Record function id": 0, "Sequence number": 29294634, "Fwd thread id": 1, "Ev Idx": 1457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016106830.857, "dur": 263.562, + "args": { + "External id": 2942387,"Sequence number": 29294634, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1458 + } + }, + { + "ph": "f", "id": 154, "pid": 1336755, "tid": 1381158, "ts": 1555016106830.857, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016106849.297, "dur": 54.046, + "args": { + "External id": 2942388,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106853.931, "dur": 2.932, + "args": { + "External id": 2942389,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016106858.228, "dur": 44.555, + "args": { + "External id": 2942390,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016106912.253, "dur": 4.218, + "args": { + "External id": 2942391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016106913.717, "dur": 2.432, + "args": { + "External id": 2942392,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016107113.937, "dur": 195.197, + "args": { + "External id": 2942393,"Record function id": 0, "Sequence number": 29294633, "Fwd thread id": 1, "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016107117.964, "dur": 182.180, + "args": { + "External id": 2942394,"Sequence number": 29294633, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1465 + } + }, + { + "ph": "f", "id": 155, "pid": 1336755, "tid": 1381158, "ts": 1555016107117.964, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1381158, + "ts": 1555016107132.497, "dur": 54.836, + "args": { + "External id": 2942395,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107134.816, "dur": 3.184, + "args": { + "External id": 2942396,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016107138.934, "dur": 47.616, + "args": { + "External id": 2942397,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1381158, + "ts": 1555016107196.324, "dur": 5.284, + "args": { + "External id": 2942398,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107197.897, "dur": 3.311, + "args": { + "External id": 2942399,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107317.366, "dur": 17.701, + "args": { + "External id": 2942400,"Record function id": 0, "Sequence number": 29294632, "Fwd thread id": 1, "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107319.032, "dur": 13.010, + "args": { + "External id": 2942401,"Sequence number": 29294632, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1472 + } + }, + { + "ph": "f", "id": 156, "pid": 1336755, "tid": 1381158, "ts": 1555016107319.032, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107323.227, "dur": 8.532, + "args": { + "External id": 2942402,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107324.427, "dur": 7.185, + "args": { + "External id": 2942403,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107338.841, "dur": 7.566, + "args": { + "External id": 2942404,"Record function id": 0, "Sequence number": 29294631, "Fwd thread id": 1, "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107339.779, "dur": 3.540, + "args": { + "External id": 2942405,"Sequence number": 29294631, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1476 + } + }, + { + "ph": "f", "id": 157, "pid": 1336755, "tid": 1381158, "ts": 1555016107339.779, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107341.258, "dur": 1.877, + "args": { + "External id": 2942406,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107342.027, "dur": 0.933, + "args": { + "External id": 2942407,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107349.618, "dur": 10.510, + "args": { + "External id": 2942408,"Record function id": 0, "Sequence number": 29294630, "Fwd thread id": 1, "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107350.258, "dur": 7.076, + "args": { + "External id": 2942409,"Sequence number": 29294630, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1480 + } + }, + { + "ph": "f", "id": 158, "pid": 1336755, "tid": 1381158, "ts": 1555016107350.258, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107355.877, "dur": 1.311, + "args": { + "External id": 2942410,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107356.324, "dur": 0.724, + "args": { + "External id": 2942411,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107363.674, "dur": 6.463, + "args": { + "External id": 2942412,"Record function id": 0, "Sequence number": 29294629, "Fwd thread id": 1, "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107364.562, "dur": 3.177, + "args": { + "External id": 2942413,"Sequence number": 29294629, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1484 + } + }, + { + "ph": "f", "id": 159, "pid": 1336755, "tid": 1381158, "ts": 1555016107364.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107365.615, "dur": 1.960, + "args": { + "External id": 2942414,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107366.257, "dur": 1.182, + "args": { + "External id": 2942415,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107373.293, "dur": 154.196, + "args": { + "External id": 2942416,"Record function id": 0, "Sequence number": 29294628, "Fwd thread id": 1, "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107374.077, "dur": 145.954, + "args": { + "External id": 2942417,"Sequence number": 29294628, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1488 + } + }, + { + "ph": "f", "id": 160, "pid": 1336755, "tid": 1381158, "ts": 1555016107374.077, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107378.133, "dur": 10.633, + "args": { + "External id": 2942418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107382.005, "dur": 6.105, + "args": { + "External id": 2942419,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107386.118, "dur": 1.667, + "args": { + "External id": 2942420,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016107390.519, "dur": 67.605, + "args": { + "External id": 2942421,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107459.345, "dur": 4.629, + "args": { + "External id": 2942422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107460.075, "dur": 3.251, + "args": { + "External id": 2942423,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107461.430, "dur": 1.737, + "args": { + "External id": 2942424,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107465.714, "dur": 5.330, + "args": { + "External id": 2942425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107469.313, "dur": 1.268, + "args": { + "External id": 2942426,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107469.985, "dur": 0.496, + "args": { + "External id": 2942427,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016107471.565, "dur": 47.551, + "args": { + "External id": 2942428,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107532.287, "dur": 8.839, + "args": { + "External id": 2942429,"Record function id": 0, "Sequence number": 29294627, "Fwd thread id": 1, "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107533.624, "dur": 5.440, + "args": { + "External id": 2942430,"Sequence number": 29294627, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1501 + } + }, + { + "ph": "f", "id": 161, "pid": 1336755, "tid": 1381158, "ts": 1555016107533.624, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107536.977, "dur": 1.949, + "args": { + "External id": 2942431,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107537.648, "dur": 1.139, + "args": { + "External id": 2942432,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107544.438, "dur": 9.296, + "args": { + "External id": 2942433,"Record function id": 0, "Sequence number": 29294626, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107545.320, "dur": 6.825, + "args": { + "External id": 2942434,"Sequence number": 29294626, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 162, "pid": 1336755, "tid": 1381158, "ts": 1555016107545.320, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107549.177, "dur": 2.731, + "args": { + "External id": 2942435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107549.811, "dur": 1.633, + "args": { + "External id": 2942436,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107550.778, "dur": 0.571, + "args": { + "External id": 2942437,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016107559.430, "dur": 8.857, + "args": { + "External id": 2942438,"Record function id": 0, "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016107560.922, "dur": 6.505, + "args": { + "External id": 2942439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016107563.266, "dur": 3.706, + "args": { + "External id": 2942440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016107564.353, "dur": 2.461, + "args": { + "External id": 2942441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107572.046, "dur": 10.476, + "args": { + "External id": 2942442,"Record function id": 0, "Sequence number": 29294625, "Fwd thread id": 1, "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107575.210, "dur": 4.776, + "args": { + "External id": 2942443,"Sequence number": 29294625, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1514 + } + }, + { + "ph": "f", "id": 163, "pid": 1336755, "tid": 1381158, "ts": 1555016107575.210, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107576.346, "dur": 3.491, + "args": { + "External id": 2942444,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107578.988, "dur": 0.710, + "args": { + "External id": 2942445,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107585.702, "dur": 95.268, + "args": { + "External id": 2942446,"Record function id": 0, "Sequence number": 29294624, "Fwd thread id": 1, "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107586.687, "dur": 87.091, + "args": { + "External id": 2942447,"Sequence number": 29294624, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1518 + } + }, + { + "ph": "f", "id": 164, "pid": 1336755, "tid": 1381158, "ts": 1555016107586.687, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107589.838, "dur": 2.175, + "args": { + "External id": 2942448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107590.384, "dur": 1.213, + "args": { + "External id": 2942449,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107591.071, "dur": 0.402, + "args": { + "External id": 2942450,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016107592.785, "dur": 27.616, + "args": { + "External id": 2942451,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107623.660, "dur": 3.279, + "args": { + "External id": 2942452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107624.287, "dur": 2.165, + "args": { + "External id": 2942453,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107625.024, "dur": 1.300, + "args": { + "External id": 2942454,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107628.337, "dur": 7.948, + "args": { + "External id": 2942455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107629.430, "dur": 6.458, + "args": { + "External id": 2942456,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107632.964, "dur": 2.807, + "args": { + "External id": 2942457,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016107638.944, "dur": 33.908, + "args": { + "External id": 2942458,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107687.935, "dur": 35.469, + "args": { + "External id": 2942459,"Record function id": 0, "Sequence number": 29294623, "Fwd thread id": 1, "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107688.990, "dur": 7.460, + "args": { + "External id": 2942460,"Sequence number": 29294623, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1531 + } + }, + { + "ph": "f", "id": 165, "pid": 1336755, "tid": 1381158, "ts": 1555016107688.990, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107690.284, "dur": 6.021, + "args": { + "External id": 2942461,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107695.167, "dur": 0.990, + "args": { + "External id": 2942462,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1381158, + "ts": 1555016107699.392, "dur": 21.690, + "args": { + "External id": 2942463,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107727.199, "dur": 33.889, + "args": { + "External id": 2942464,"Record function id": 0, "Sequence number": 29294622, "Fwd thread id": 1, "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107751.203, "dur": 7.548, + "args": { + "External id": 2942465,"Sequence number": 29294622, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1536 + } + }, + { + "ph": "f", "id": 166, "pid": 1336755, "tid": 1381158, "ts": 1555016107751.203, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107753.780, "dur": 4.730, + "args": { + "External id": 2942466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107754.522, "dur": 3.490, + "args": { + "External id": 2942467,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107757.360, "dur": 0.544, + "args": { + "External id": 2942468,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016107765.341, "dur": 5.006, + "args": { + "External id": 2942469,"Record function id": 0, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016107766.363, "dur": 3.386, + "args": { + "External id": 2942470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016107767.442, "dur": 1.882, + "args": { + "External id": 2942471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016107767.884, "dur": 1.356, + "args": { + "External id": 2942472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107773.731, "dur": 7.625, + "args": { + "External id": 2942473,"Record function id": 0, "Sequence number": 29294621, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107774.657, "dur": 5.026, + "args": { + "External id": 2942474,"Sequence number": 29294621, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 167, "pid": 1336755, "tid": 1381158, "ts": 1555016107774.657, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107775.603, "dur": 3.926, + "args": { + "External id": 2942475,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107778.627, "dur": 0.760, + "args": { + "External id": 2942476,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107784.534, "dur": 93.982, + "args": { + "External id": 2942477,"Record function id": 0, "Sequence number": 29294620, "Fwd thread id": 1, "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107785.380, "dur": 84.949, + "args": { + "External id": 2942478,"Sequence number": 29294620, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1549 + } + }, + { + "ph": "f", "id": 168, "pid": 1336755, "tid": 1381158, "ts": 1555016107785.380, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107788.734, "dur": 3.937, + "args": { + "External id": 2942479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107789.168, "dur": 3.075, + "args": { + "External id": 2942480,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107791.707, "dur": 0.413, + "args": { + "External id": 2942481,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016107793.434, "dur": 30.184, + "args": { + "External id": 2942482,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107826.673, "dur": 2.367, + "args": { + "External id": 2942483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107827.137, "dur": 1.386, + "args": { + "External id": 2942484,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107827.824, "dur": 0.585, + "args": { + "External id": 2942485,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107830.303, "dur": 2.377, + "args": { + "External id": 2942486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107831.282, "dur": 1.039, + "args": { + "External id": 2942487,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107831.769, "dur": 0.483, + "args": { + "External id": 2942488,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016107835.199, "dur": 34.412, + "args": { + "External id": 2942489,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107882.622, "dur": 28.265, + "args": { + "External id": 2942490,"Record function id": 0, "Sequence number": 29294619, "Fwd thread id": 1, "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107883.626, "dur": 5.347, + "args": { + "External id": 2942491,"Sequence number": 29294619, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1562 + } + }, + { + "ph": "f", "id": 169, "pid": 1336755, "tid": 1381158, "ts": 1555016107883.626, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107887.012, "dur": 1.812, + "args": { + "External id": 2942492,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107887.651, "dur": 1.035, + "args": { + "External id": 2942493,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016107891.700, "dur": 16.878, + "args": { + "External id": 2942494,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107914.548, "dur": 9.040, + "args": { + "External id": 2942495,"Record function id": 0, "Sequence number": 29294618, "Fwd thread id": 1, "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336755, "tid": 1381158, + "ts": 1555016107915.348, "dur": 5.560, + "args": { + "External id": 2942496,"Sequence number": 29294618, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1567 + } + }, + { + "ph": "f", "id": 170, "pid": 1336755, "tid": 1381158, "ts": 1555016107915.348, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1381158, + "ts": 1555016107915.928, "dur": 4.753, + "args": { + "External id": 2942497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1381158, + "ts": 1555016107916.508, "dur": 3.611, + "args": { + "External id": 2942498,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016107919.541, "dur": 0.445, + "args": { + "External id": 2942499,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016107927.438, "dur": 5.155, + "args": { + "External id": 2942500,"Record function id": 0, "Ev Idx": 1571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016107928.602, "dur": 3.454, + "args": { + "External id": 2942501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016107929.694, "dur": 1.997, + "args": { + "External id": 2942502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016107930.148, "dur": 1.426, + "args": { + "External id": 2942503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016107936.506, "dur": 429.675, + "args": { + "External id": 2942504,"Record function id": 0, "Sequence number": 29294617, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016107939.720, "dur": 389.841, + "args": { + "External id": 2942505,"Sequence number": 29294617, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 171, "pid": 1336755, "tid": 1381158, "ts": 1555016107939.720, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016107967.850, "dur": 1.581, + "args": { + "External id": 2942506,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016107968.359, "dur": 0.906, + "args": { + "External id": 2942507,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016108026.649, "dur": 7.794, + "args": { + "External id": 2942508,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016108046.674, "dur": 1.991, + "args": { + "External id": 2942509,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016108217.641, "dur": 2.833, + "args": { + "External id": 2942510,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1381158, + "ts": 1555016108224.857, "dur": 40.888, + "args": { + "External id": 2942511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108238.873, "dur": 0.937, + "args": { + "External id": 2942512,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016108271.570, "dur": 31.895, + "args": { + "External id": 2942513,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016108273.687, "dur": 29.552, + "args": { + "External id": 2942514,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108277.674, "dur": 4.683, + "args": { + "External id": 2942515,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016108285.793, "dur": 16.925, + "args": { + "External id": 2942516,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1381158, + "ts": 1555016108307.655, "dur": 4.935, + "args": { + "External id": 2942517,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016108311.505, "dur": 0.942, + "args": { + "External id": 2942518,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1381158, + "ts": 1555016108320.077, "dur": 2.304, + "args": { + "External id": 2942519,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016108321.069, "dur": 1.212, + "args": { + "External id": 2942520,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1381158, + "ts": 1555016108345.074, "dur": 16.359, + "args": { + "External id": 2942521,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016108378.247, "dur": 11.293, + "args": { + "External id": 2942522,"Record function id": 0, "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016108380.207, "dur": 8.510, + "args": { + "External id": 2942523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016108382.307, "dur": 5.358, + "args": { + "External id": 2942524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016108385.613, "dur": 1.914, + "args": { + "External id": 2942525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016108394.182, "dur": 2873.578, + "args": { + "External id": 2942526,"Record function id": 0, "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.27)", "pid": 1336755, "tid": 1381158, + "ts": 1555016108425.886, "dur": 998.333, + "args": { + "External id": 2942527,"Record function id": 0, "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.26", "pid": 1336755, "tid": 1381158, + "ts": 1555016108455.200, "dur": 960.211, + "args": { + "External id": 2942528,"Record function id": 0, "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 1336755, "tid": 1381158, + "ts": 1555016108471.643, "dur": 928.290, + "args": { + "External id": 2942529,"Record function id": 0, "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016108571.200, "dur": 6.632, + "args": { + "External id": 2942530,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016108595.558, "dur": 41.348, + "args": { + "External id": 2942531,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108602.356, "dur": 3.302, + "args": { + "External id": 2942532,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108606.898, "dur": 0.621, + "args": { + "External id": 2942533,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108612.825, "dur": 0.425, + "args": { + "External id": 2942534,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108613.777, "dur": 0.293, + "args": { + "External id": 2942535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108614.800, "dur": 0.333, + "args": { + "External id": 2942536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108619.585, "dur": 0.313, + "args": { + "External id": 2942537,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108620.992, "dur": 0.377, + "args": { + "External id": 2942538,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108621.813, "dur": 2.828, + "args": { + "External id": 2942539,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108627.197, "dur": 2.658, + "args": { + "External id": 2942540,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016108653.730, "dur": 36.357, + "args": { + "External id": 2942541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016108735.950, "dur": 120.245, + "args": { + "External id": 2942542,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016108750.215, "dur": 6.059, + "args": { + "External id": 2942543,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016108763.534, "dur": 9.530, + "args": { + "External id": 2942544,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016108767.737, "dur": 4.934, + "args": { + "External id": 2942545,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108770.764, "dur": 0.629, + "args": { + "External id": 2942546,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016108780.097, "dur": 33.421, + "args": { + "External id": 2942547,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108782.573, "dur": 0.691, + "args": { + "External id": 2942548,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108784.229, "dur": 2.431, + "args": { + "External id": 2942549,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108787.246, "dur": 0.352, + "args": { + "External id": 2942550,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108790.426, "dur": 0.501, + "args": { + "External id": 2942551,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108793.943, "dur": 0.353, + "args": { + "External id": 2942552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108795.186, "dur": 2.578, + "args": { + "External id": 2942553,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108800.929, "dur": 0.485, + "args": { + "External id": 2942554,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108804.010, "dur": 0.352, + "args": { + "External id": 2942555,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016108807.123, "dur": 0.331, + "args": { + "External id": 2942556,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016108823.740, "dur": 24.170, + "args": { + "External id": 2942557,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016108925.046, "dur": 361.350, + "args": { + "External id": 2942558,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016108962.162, "dur": 318.104, + "args": { + "External id": 2942559,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1630, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016108975.257, "dur": 298.287, + "args": { + "External id": 2942560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016109314.338, "dur": 2.467, + "args": { + "External id": 2942561,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1632, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016109432.840, "dur": 1812.589, + "args": { + "External id": 2942562,"Sequence number": 29294616, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1633 + } + }, + { + "ph": "f", "id": 172, "pid": 1336755, "tid": 1381158, "ts": 1555016109432.840, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016109575.111, "dur": 104.745, + "args": { + "External id": 2942563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016109727.731, "dur": 46.483, + "args": { + "External id": 2942564,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016109793.049, "dur": 50.232, + "args": { + "External id": 2942565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016109853.131, "dur": 31.694, + "args": { + "External id": 2942566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016109891.151, "dur": 44.703, + "args": { + "External id": 2942567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016109944.557, "dur": 27.249, + "args": { + "External id": 2942568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016109979.424, "dur": 86.672, + "args": { + "External id": 2942569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016110100.912, "dur": 30.958, + "args": { + "External id": 2942570,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016110171.131, "dur": 41.787, + "args": { + "External id": 2942571,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016110241.521, "dur": 22.970, + "args": { + "External id": 2942572,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016110281.724, "dur": 21.483, + "args": { + "External id": 2942573,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110313.012, "dur": 34.746, + "args": { + "External id": 2942574,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110351.147, "dur": 32.935, + "args": { + "External id": 2942575,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016110415.738, "dur": 179.905, + "args": { + "External id": 2942576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016110501.286, "dur": 7.392, + "args": { + "External id": 2942577,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016110510.467, "dur": 2.090, + "args": { + "External id": 2942578,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016110636.119, "dur": 27.937, + "args": { + "External id": 2942579,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016110682.747, "dur": 17.613, + "args": { + "External id": 2942580,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110708.216, "dur": 34.960, + "args": { + "External id": 2942581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110749.135, "dur": 34.066, + "args": { + "External id": 2942582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110790.270, "dur": 21.598, + "args": { + "External id": 2942583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110818.459, "dur": 29.864, + "args": { + "External id": 2942584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110853.977, "dur": 35.401, + "args": { + "External id": 2942585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016110902.099, "dur": 36.521, + "args": { + "External id": 2942586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016110964.115, "dur": 63.714, + "args": { + "External id": 2942587,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016111052.029, "dur": 28.668, + "args": { + "External id": 2942588,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016111103.765, "dur": 18.925, + "args": { + "External id": 2942589,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016111137.417, "dur": 29.576, + "args": { + "External id": 2942590,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016111192.650, "dur": 20.430, + "args": { + "External id": 2942591,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111290.417, "dur": 13.903, + "args": { + "External id": 2942592,"Record function id": 0, "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111293.281, "dur": 10.169, + "args": { + "External id": 2942593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111297.599, "dur": 5.002, + "args": { + "External id": 2942594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111298.900, "dur": 3.606, + "args": { + "External id": 2942595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111308.494, "dur": 7.808, + "args": { + "External id": 2942596,"Record function id": 0, "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111311.942, "dur": 3.866, + "args": { + "External id": 2942597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111313.521, "dur": 1.714, + "args": { + "External id": 2942598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111314.246, "dur": 0.912, + "args": { + "External id": 2942599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111319.625, "dur": 4.755, + "args": { + "External id": 2942600,"Record function id": 0, "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111320.920, "dur": 3.022, + "args": { + "External id": 2942601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111321.800, "dur": 1.640, + "args": { + "External id": 2942602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111322.724, "dur": 0.609, + "args": { + "External id": 2942603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111327.557, "dur": 7.070, + "args": { + "External id": 2942604,"Record function id": 0, "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111328.855, "dur": 5.343, + "args": { + "External id": 2942605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111329.792, "dur": 3.954, + "args": { + "External id": 2942606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111330.542, "dur": 3.079, + "args": { + "External id": 2942607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111337.731, "dur": 4.664, + "args": { + "External id": 2942608,"Record function id": 0, "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111339.232, "dur": 2.692, + "args": { + "External id": 2942609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111340.155, "dur": 1.151, + "args": { + "External id": 2942610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111340.426, "dur": 0.786, + "args": { + "External id": 2942611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111345.426, "dur": 4.076, + "args": { + "External id": 2942612,"Record function id": 0, "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111346.645, "dur": 2.447, + "args": { + "External id": 2942613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111347.412, "dur": 1.278, + "args": { + "External id": 2942614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111348.004, "dur": 0.589, + "args": { + "External id": 2942615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111352.634, "dur": 3.930, + "args": { + "External id": 2942616,"Record function id": 0, "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111353.701, "dur": 2.452, + "args": { + "External id": 2942617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111354.408, "dur": 1.071, + "args": { + "External id": 2942618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111354.831, "dur": 0.579, + "args": { + "External id": 2942619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111359.813, "dur": 6.122, + "args": { + "External id": 2942620,"Record function id": 0, "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111361.144, "dur": 4.399, + "args": { + "External id": 2942621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111361.955, "dur": 3.194, + "args": { + "External id": 2942622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111364.313, "dur": 0.724, + "args": { + "External id": 2942623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111368.977, "dur": 3.651, + "args": { + "External id": 2942624,"Record function id": 0, "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016111369.977, "dur": 2.227, + "args": { + "External id": 2942625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111370.580, "dur": 1.153, + "args": { + "External id": 2942626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016111370.864, "dur": 0.773, + "args": { + "External id": 2942627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016111378.518, "dur": 167921.529, + "args": { + "External id": 2942628,"Record function id": 0, "Sequence number": 29294615, "Fwd thread id": 1, "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016111380.223, "dur": 167910.683, + "args": { + "External id": 2942629,"Sequence number": 29294615, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1700 + } + }, + { + "ph": "f", "id": 173, "pid": 1336755, "tid": 1381158, "ts": 1555016111380.223, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.27)", "pid": 1336755, "tid": 1381158, + "ts": 1555016111413.036, "dur": 44.427, + "args": { + "External id": 2942630,"Record function id": 0, "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.27)", "pid": 1336755, "tid": 1381158, + "ts": 1555016111465.157, "dur": 99.908, + "args": { + "External id": 2942631,"Record function id": 0, "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.27)", "pid": 1336755, "tid": 1381158, + "ts": 1555016111572.198, "dur": 167710.063, + "args": { + "External id": 2942632,"Record function id": 0, "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016111633.318, "dur": 6.683, + "args": { + "External id": 2942633,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016111650.928, "dur": 4.393, + "args": { + "External id": 2942634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016111676.237, "dur": 166720.147, + "args": { + "External id": 2942635,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016111692.851, "dur": 166692.127, + "args": { + "External id": 2942636,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016111796.944, "dur": 6.003, + "args": { + "External id": 2942637,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016111818.938, "dur": 166519.597, + "args": { + "External id": 2942638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016111821.510, "dur": 166516.032, + "args": { + "External id": 2942639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016111824.659, "dur": 7.241, + "args": { + "External id": 2942640,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016111833.430, "dur": 166499.987, + "args": { + "External id": 2942641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016278500.728, "dur": 10.010, + "args": { + "External id": 2942642,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016278503.897, "dur": 6.404, + "args": { + "External id": 2942643,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016278551.360, "dur": 349.153, + "args": { + "External id": 2942644,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016278584.705, "dur": 311.347, + "args": { + "External id": 2942645,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1716, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016278598.409, "dur": 292.544, + "args": { + "External id": 2942646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016278924.579, "dur": 2.085, + "args": { + "External id": 2942647,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1718, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279018.923, "dur": 6.892, + "args": { + "External id": 2942648,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279084.896, "dur": 1.285, + "args": { + "External id": 2942649,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279103.531, "dur": 1.439, + "args": { + "External id": 2942650,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279120.929, "dur": 1.097, + "args": { + "External id": 2942651,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279134.019, "dur": 0.849, + "args": { + "External id": 2942652,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279161.664, "dur": 1.720, + "args": { + "External id": 2942653,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279177.394, "dur": 1.363, + "args": { + "External id": 2942654,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279193.389, "dur": 2.999, + "args": { + "External id": 2942655,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279208.602, "dur": 1.050, + "args": { + "External id": 2942656,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016279316.113, "dur": 2919.116, + "args": { + "External id": 2942657,"Record function id": 0, "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.26)", "pid": 1336755, "tid": 1381158, + "ts": 1555016279337.184, "dur": 1126.745, + "args": { + "External id": 2942658,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 1336755, "tid": 1381158, + "ts": 1555016279351.626, "dur": 350.548, + "args": { + "External id": 2942659,"Record function id": 0, "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279445.010, "dur": 4.144, + "args": { + "External id": 2942660,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279452.169, "dur": 1.052, + "args": { + "External id": 2942661,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279455.049, "dur": 0.913, + "args": { + "External id": 2942662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279457.654, "dur": 3.101, + "args": { + "External id": 2942663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279464.770, "dur": 1.051, + "args": { + "External id": 2942664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279467.338, "dur": 1.027, + "args": { + "External id": 2942665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279470.025, "dur": 3.161, + "args": { + "External id": 2942666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279474.621, "dur": 0.860, + "args": { + "External id": 2942667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279481.112, "dur": 0.952, + "args": { + "External id": 2942668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016279483.567, "dur": 0.843, + "args": { + "External id": 2942669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016279507.671, "dur": 165.479, + "args": { + "External id": 2942670,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016279528.907, "dur": 140.074, + "args": { + "External id": 2942671,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016279550.750, "dur": 12.612, + "args": { + "External id": 2942672,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016279567.100, "dur": 72.394, + "args": { + "External id": 2942673,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016279569.968, "dur": 69.167, + "args": { + "External id": 2942674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279573.373, "dur": 8.667, + "args": { + "External id": 2942675,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016279585.799, "dur": 52.654, + "args": { + "External id": 2942676,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 1336755, "tid": 1381158, + "ts": 1555016279797.993, "dur": 655.323, + "args": { + "External id": 2942677,"Record function id": 0, "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 1336755, "tid": 1381158, + "ts": 1555016279813.927, "dur": 627.656, + "args": { + "External id": 2942678,"Record function id": 0, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016279873.495, "dur": 4.179, + "args": { + "External id": 2942679,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016279891.698, "dur": 39.068, + "args": { + "External id": 2942680,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279897.815, "dur": 2.985, + "args": { + "External id": 2942681,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279902.462, "dur": 0.479, + "args": { + "External id": 2942682,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279903.488, "dur": 0.453, + "args": { + "External id": 2942683,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279909.249, "dur": 0.271, + "args": { + "External id": 2942684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279910.572, "dur": 0.446, + "args": { + "External id": 2942685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279911.749, "dur": 0.349, + "args": { + "External id": 2942686,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279917.138, "dur": 2.207, + "args": { + "External id": 2942687,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279920.260, "dur": 0.413, + "args": { + "External id": 2942688,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016279923.169, "dur": 1.864, + "args": { + "External id": 2942689,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016279954.568, "dur": 73.625, + "args": { + "External id": 2942690,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016280063.807, "dur": 138.313, + "args": { + "External id": 2942691,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016280074.187, "dur": 6.216, + "args": { + "External id": 2942692,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016280086.654, "dur": 12.135, + "args": { + "External id": 2942693,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016280090.849, "dur": 7.500, + "args": { + "External id": 2942694,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280095.994, "dur": 0.726, + "args": { + "External id": 2942695,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016280106.233, "dur": 32.686, + "args": { + "External id": 2942696,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280107.727, "dur": 0.344, + "args": { + "External id": 2942697,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280111.077, "dur": 2.514, + "args": { + "External id": 2942698,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280114.312, "dur": 0.249, + "args": { + "External id": 2942699,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280115.299, "dur": 2.192, + "args": { + "External id": 2942700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280122.341, "dur": 0.311, + "args": { + "External id": 2942701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280123.391, "dur": 0.343, + "args": { + "External id": 2942702,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280126.672, "dur": 0.589, + "args": { + "External id": 2942703,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280130.049, "dur": 0.386, + "args": { + "External id": 2942704,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016280131.237, "dur": 0.384, + "args": { + "External id": 2942705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016280170.660, "dur": 22.919, + "args": { + "External id": 2942706,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016280249.128, "dur": 124.288, + "args": { + "External id": 2942707,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016280280.969, "dur": 89.232, + "args": { + "External id": 2942708,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1779, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016280291.881, "dur": 74.484, + "args": { + "External id": 2942709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016280391.334, "dur": 1.905, + "args": { + "External id": 2942710,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1781, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016280470.480, "dur": 1740.871, + "args": { + "External id": 2942711,"Sequence number": 29294614, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1782 + } + }, + { + "ph": "f", "id": 174, "pid": 1336755, "tid": 1381158, "ts": 1555016280470.480, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016280587.314, "dur": 105.247, + "args": { + "External id": 2942712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016280738.838, "dur": 42.591, + "args": { + "External id": 2942713,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016280797.657, "dur": 50.012, + "args": { + "External id": 2942714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016280857.693, "dur": 31.468, + "args": { + "External id": 2942715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016280897.910, "dur": 44.741, + "args": { + "External id": 2942716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016280949.889, "dur": 26.855, + "args": { + "External id": 2942717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016280983.704, "dur": 86.709, + "args": { + "External id": 2942718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016281099.821, "dur": 26.794, + "args": { + "External id": 2942719,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016281166.768, "dur": 34.169, + "args": { + "External id": 2942720,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016281226.738, "dur": 20.983, + "args": { + "External id": 2942721,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016281265.665, "dur": 16.594, + "args": { + "External id": 2942722,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281290.615, "dur": 33.085, + "args": { + "External id": 2942723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281326.911, "dur": 34.526, + "args": { + "External id": 2942724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016281392.936, "dur": 175.373, + "args": { + "External id": 2942725,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016281478.807, "dur": 5.489, + "args": { + "External id": 2942726,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016281486.207, "dur": 2.270, + "args": { + "External id": 2942727,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016281609.295, "dur": 27.113, + "args": { + "External id": 2942728,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016281650.217, "dur": 17.981, + "args": { + "External id": 2942729,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281675.774, "dur": 50.698, + "args": { + "External id": 2942730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281737.415, "dur": 42.058, + "args": { + "External id": 2942731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281792.264, "dur": 24.863, + "args": { + "External id": 2942732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281821.559, "dur": 29.759, + "args": { + "External id": 2942733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281856.498, "dur": 22.786, + "args": { + "External id": 2942734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016281885.487, "dur": 29.368, + "args": { + "External id": 2942735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016281940.353, "dur": 22.065, + "args": { + "External id": 2942736,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016281983.398, "dur": 67.174, + "args": { + "External id": 2942737,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016282071.311, "dur": 20.084, + "args": { + "External id": 2942738,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016282113.055, "dur": 15.096, + "args": { + "External id": 2942739,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016282156.873, "dur": 22.211, + "args": { + "External id": 2942740,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282257.687, "dur": 14.007, + "args": { + "External id": 2942741,"Record function id": 0, "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282260.901, "dur": 9.922, + "args": { + "External id": 2942742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282264.702, "dur": 5.279, + "args": { + "External id": 2942743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282266.161, "dur": 3.702, + "args": { + "External id": 2942744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282275.553, "dur": 4.820, + "args": { + "External id": 2942745,"Record function id": 0, "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282277.006, "dur": 2.897, + "args": { + "External id": 2942746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282277.948, "dur": 1.476, + "args": { + "External id": 2942747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282278.572, "dur": 0.761, + "args": { + "External id": 2942748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282283.706, "dur": 10.142, + "args": { + "External id": 2942749,"Record function id": 0, "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282284.774, "dur": 8.608, + "args": { + "External id": 2942750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282288.816, "dur": 3.999, + "args": { + "External id": 2942751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282289.476, "dur": 3.208, + "args": { + "External id": 2942752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282297.077, "dur": 4.328, + "args": { + "External id": 2942753,"Record function id": 0, "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282298.152, "dur": 2.844, + "args": { + "External id": 2942754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282298.917, "dur": 1.573, + "args": { + "External id": 2942755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282299.537, "dur": 0.869, + "args": { + "External id": 2942756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282304.452, "dur": 4.173, + "args": { + "External id": 2942757,"Record function id": 0, "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282305.775, "dur": 2.442, + "args": { + "External id": 2942758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282306.443, "dur": 1.189, + "args": { + "External id": 2942759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282306.996, "dur": 0.563, + "args": { + "External id": 2942760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282311.832, "dur": 5.556, + "args": { + "External id": 2942761,"Record function id": 0, "Ev Idx": 1832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282314.733, "dur": 2.206, + "args": { + "External id": 2942762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282315.403, "dur": 0.982, + "args": { + "External id": 2942763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282315.782, "dur": 0.539, + "args": { + "External id": 2942764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282320.644, "dur": 3.721, + "args": { + "External id": 2942765,"Record function id": 0, "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282321.718, "dur": 2.203, + "args": { + "External id": 2942766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282322.172, "dur": 1.201, + "args": { + "External id": 2942767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282322.527, "dur": 0.741, + "args": { + "External id": 2942768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282327.512, "dur": 5.650, + "args": { + "External id": 2942769,"Record function id": 0, "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282328.386, "dur": 4.363, + "args": { + "External id": 2942770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282328.868, "dur": 3.296, + "args": { + "External id": 2942771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282331.492, "dur": 0.600, + "args": { + "External id": 2942772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282336.252, "dur": 4.089, + "args": { + "External id": 2942773,"Record function id": 0, "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016282337.178, "dur": 2.765, + "args": { + "External id": 2942774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282337.802, "dur": 1.568, + "args": { + "External id": 2942775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016282338.305, "dur": 0.992, + "args": { + "External id": 2942776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016282347.261, "dur": 42835.560, + "args": { + "External id": 2942777,"Record function id": 0, "Sequence number": 29294613, "Fwd thread id": 1, "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016282348.683, "dur": 42823.569, + "args": { + "External id": 2942778,"Sequence number": 29294613, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1849 + } + }, + { + "ph": "f", "id": 175, "pid": 1336755, "tid": 1381158, "ts": 1555016282348.683, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.26)", "pid": 1336755, "tid": 1381158, + "ts": 1555016282376.389, "dur": 36.383, + "args": { + "External id": 2942779,"Record function id": 0, "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.26)", "pid": 1336755, "tid": 1381158, + "ts": 1555016282420.691, "dur": 65.210, + "args": { + "External id": 2942780,"Record function id": 0, "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.26)", "pid": 1336755, "tid": 1381158, + "ts": 1555016282493.537, "dur": 42669.758, + "args": { + "External id": 2942781,"Record function id": 0, "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016282579.618, "dur": 6.752, + "args": { + "External id": 2942782,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016282594.890, "dur": 6.237, + "args": { + "External id": 2942783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016282618.852, "dur": 41756.699, + "args": { + "External id": 2942784,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016282631.464, "dur": 41734.325, + "args": { + "External id": 2942785,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016282701.777, "dur": 12.573, + "args": { + "External id": 2942786,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016282724.385, "dur": 41603.729, + "args": { + "External id": 2942787,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016282726.689, "dur": 41600.551, + "args": { + "External id": 2942788,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016282730.833, "dur": 4.785, + "args": { + "External id": 2942789,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016282737.102, "dur": 41586.646, + "args": { + "External id": 2942790,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016324462.320, "dur": 8.052, + "args": { + "External id": 2942791,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016324464.912, "dur": 5.201, + "args": { + "External id": 2942792,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016324500.013, "dur": 338.303, + "args": { + "External id": 2942793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016324532.354, "dur": 301.564, + "args": { + "External id": 2942794,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1865, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016324546.303, "dur": 282.668, + "args": { + "External id": 2942795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016324856.651, "dur": 2.461, + "args": { + "External id": 2942796,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1867, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016324909.270, "dur": 5.910, + "args": { + "External id": 2942797,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016324954.228, "dur": 1.207, + "args": { + "External id": 2942798,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016324970.670, "dur": 3.832, + "args": { + "External id": 2942799,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325026.420, "dur": 1.620, + "args": { + "External id": 2942800,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325039.992, "dur": 0.827, + "args": { + "External id": 2942801,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325052.410, "dur": 1.147, + "args": { + "External id": 2942802,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325063.120, "dur": 3.920, + "args": { + "External id": 2942803,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325076.996, "dur": 3.505, + "args": { + "External id": 2942804,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325092.836, "dur": 0.856, + "args": { + "External id": 2942805,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016325198.423, "dur": 2759.006, + "args": { + "External id": 2942806,"Record function id": 0, "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 1336755, "tid": 1381158, + "ts": 1555016325217.295, "dur": 1062.117, + "args": { + "External id": 2942807,"Record function id": 0, "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 1336755, "tid": 1381158, + "ts": 1555016325231.924, "dur": 320.776, + "args": { + "External id": 2942808,"Record function id": 0, "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325318.389, "dur": 4.480, + "args": { + "External id": 2942809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325325.982, "dur": 1.196, + "args": { + "External id": 2942810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325328.903, "dur": 3.274, + "args": { + "External id": 2942811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325333.978, "dur": 1.131, + "args": { + "External id": 2942812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325338.332, "dur": 1.287, + "args": { + "External id": 2942813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325340.974, "dur": 1.080, + "args": { + "External id": 2942814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325345.609, "dur": 2.756, + "args": { + "External id": 2942815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325349.509, "dur": 1.136, + "args": { + "External id": 2942816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325354.069, "dur": 1.046, + "args": { + "External id": 2942817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016325356.678, "dur": 1.267, + "args": { + "External id": 2942818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016325377.734, "dur": 147.174, + "args": { + "External id": 2942819,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016325395.199, "dur": 125.552, + "args": { + "External id": 2942820,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016325411.409, "dur": 15.196, + "args": { + "External id": 2942821,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016325430.237, "dur": 63.463, + "args": { + "External id": 2942822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016325434.627, "dur": 58.687, + "args": { + "External id": 2942823,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325437.781, "dur": 4.814, + "args": { + "External id": 2942824,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016325444.124, "dur": 48.607, + "args": { + "External id": 2942825,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 1336755, "tid": 1381158, + "ts": 1555016325639.525, "dur": 632.181, + "args": { + "External id": 2942826,"Record function id": 0, "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 1336755, "tid": 1381158, + "ts": 1555016325653.649, "dur": 605.438, + "args": { + "External id": 2942827,"Record function id": 0, "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016325714.022, "dur": 4.077, + "args": { + "External id": 2942828,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016325731.860, "dur": 38.062, + "args": { + "External id": 2942829,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325737.628, "dur": 1.589, + "args": { + "External id": 2942830,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325742.415, "dur": 0.345, + "args": { + "External id": 2942831,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325743.340, "dur": 0.503, + "args": { + "External id": 2942832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325745.976, "dur": 2.080, + "args": { + "External id": 2942833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325748.597, "dur": 0.527, + "args": { + "External id": 2942834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325751.697, "dur": 2.588, + "args": { + "External id": 2942835,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325756.634, "dur": 0.336, + "args": { + "External id": 2942836,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325757.478, "dur": 0.430, + "args": { + "External id": 2942837,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325760.516, "dur": 0.364, + "args": { + "External id": 2942838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016325778.489, "dur": 30.432, + "args": { + "External id": 2942839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016325841.504, "dur": 118.621, + "args": { + "External id": 2942840,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016325850.323, "dur": 3.068, + "args": { + "External id": 2942841,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016325858.539, "dur": 13.145, + "args": { + "External id": 2942842,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016325862.534, "dur": 8.724, + "args": { + "External id": 2942843,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325867.449, "dur": 2.227, + "args": { + "External id": 2942844,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016325878.242, "dur": 42.079, + "args": { + "External id": 2942845,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325879.849, "dur": 0.312, + "args": { + "External id": 2942846,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325882.829, "dur": 0.689, + "args": { + "External id": 2942847,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325896.333, "dur": 2.604, + "args": { + "External id": 2942848,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325903.558, "dur": 0.274, + "args": { + "External id": 2942849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325904.329, "dur": 0.264, + "args": { + "External id": 2942850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325905.067, "dur": 2.242, + "args": { + "External id": 2942851,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325909.837, "dur": 0.456, + "args": { + "External id": 2942852,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325910.777, "dur": 0.394, + "args": { + "External id": 2942853,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016325913.190, "dur": 0.451, + "args": { + "External id": 2942854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016325930.949, "dur": 21.538, + "args": { + "External id": 2942855,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016326044.402, "dur": 140.453, + "args": { + "External id": 2942856,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016326075.597, "dur": 105.578, + "args": { + "External id": 2942857,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1928, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016326084.897, "dur": 91.815, + "args": { + "External id": 2942858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016326202.318, "dur": 2.025, + "args": { + "External id": 2942859,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1930, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016326286.812, "dur": 1644.739, + "args": { + "External id": 2942860,"Sequence number": 29294612, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1931 + } + }, + { + "ph": "f", "id": 176, "pid": 1336755, "tid": 1381158, "ts": 1555016326286.812, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016326399.456, "dur": 104.830, + "args": { + "External id": 2942861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016326544.911, "dur": 41.561, + "args": { + "External id": 2942862,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016326602.755, "dur": 48.004, + "args": { + "External id": 2942863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016326661.766, "dur": 30.256, + "args": { + "External id": 2942864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016326698.417, "dur": 44.194, + "args": { + "External id": 2942865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016326749.674, "dur": 27.076, + "args": { + "External id": 2942866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016326792.859, "dur": 41.030, + "args": { + "External id": 2942867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016326864.453, "dur": 26.270, + "args": { + "External id": 2942868,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016326913.298, "dur": 29.790, + "args": { + "External id": 2942869,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016326968.944, "dur": 60.377, + "args": { + "External id": 2942870,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016327052.600, "dur": 16.029, + "args": { + "External id": 2942871,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327079.024, "dur": 35.009, + "args": { + "External id": 2942872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327117.559, "dur": 48.226, + "args": { + "External id": 2942873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016327198.450, "dur": 175.368, + "args": { + "External id": 2942874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016327279.916, "dur": 8.903, + "args": { + "External id": 2942875,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016327290.641, "dur": 2.429, + "args": { + "External id": 2942876,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016327412.172, "dur": 25.529, + "args": { + "External id": 2942877,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016327454.522, "dur": 16.074, + "args": { + "External id": 2942878,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327477.421, "dur": 39.112, + "args": { + "External id": 2942879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327521.794, "dur": 35.679, + "args": { + "External id": 2942880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327565.735, "dur": 21.853, + "args": { + "External id": 2942881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327591.381, "dur": 29.776, + "args": { + "External id": 2942882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327641.971, "dur": 30.372, + "args": { + "External id": 2942883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016327680.290, "dur": 31.098, + "args": { + "External id": 2942884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016327735.747, "dur": 25.588, + "args": { + "External id": 2942885,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016327780.075, "dur": 25.501, + "args": { + "External id": 2942886,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016327822.959, "dur": 19.295, + "args": { + "External id": 2942887,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016327858.774, "dur": 16.878, + "args": { + "External id": 2942888,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016327889.890, "dur": 16.702, + "args": { + "External id": 2942889,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016327977.925, "dur": 54.151, + "args": { + "External id": 2942890,"Record function id": 0, "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328018.789, "dur": 11.776, + "args": { + "External id": 2942891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328023.359, "dur": 5.754, + "args": { + "External id": 2942892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328024.809, "dur": 4.027, + "args": { + "External id": 2942893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328038.308, "dur": 9.671, + "args": { + "External id": 2942894,"Record function id": 0, "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328042.220, "dur": 5.138, + "args": { + "External id": 2942895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328043.220, "dur": 3.691, + "args": { + "External id": 2942896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328043.743, "dur": 3.084, + "args": { + "External id": 2942897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328051.489, "dur": 4.290, + "args": { + "External id": 2942898,"Record function id": 0, "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328052.643, "dur": 2.729, + "args": { + "External id": 2942899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328053.165, "dur": 1.782, + "args": { + "External id": 2942900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328053.727, "dur": 1.140, + "args": { + "External id": 2942901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328059.090, "dur": 4.312, + "args": { + "External id": 2942902,"Record function id": 0, "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328060.240, "dur": 2.769, + "args": { + "External id": 2942903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328061.044, "dur": 1.423, + "args": { + "External id": 2942904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328061.660, "dur": 0.707, + "args": { + "External id": 2942905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328066.704, "dur": 3.674, + "args": { + "External id": 2942906,"Record function id": 0, "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328067.567, "dur": 2.399, + "args": { + "External id": 2942907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328068.253, "dur": 1.282, + "args": { + "External id": 2942908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328068.554, "dur": 0.917, + "args": { + "External id": 2942909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328073.518, "dur": 3.896, + "args": { + "External id": 2942910,"Record function id": 0, "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328074.756, "dur": 2.226, + "args": { + "External id": 2942911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328075.254, "dur": 1.208, + "args": { + "External id": 2942912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328075.739, "dur": 0.618, + "args": { + "External id": 2942913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328080.644, "dur": 6.345, + "args": { + "External id": 2942914,"Record function id": 0, "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328081.822, "dur": 4.727, + "args": { + "External id": 2942915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328082.808, "dur": 3.316, + "args": { + "External id": 2942916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328085.370, "dur": 0.692, + "args": { + "External id": 2942917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328090.148, "dur": 3.594, + "args": { + "External id": 2942918,"Record function id": 0, "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328091.328, "dur": 1.998, + "args": { + "External id": 2942919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328091.846, "dur": 0.928, + "args": { + "External id": 2942920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328092.132, "dur": 0.578, + "args": { + "External id": 2942921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328096.725, "dur": 5.918, + "args": { + "External id": 2942922,"Record function id": 0, "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016328097.851, "dur": 4.340, + "args": { + "External id": 2942923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328098.362, "dur": 3.247, + "args": { + "External id": 2942924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016328098.629, "dur": 2.913, + "args": { + "External id": 2942925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016328106.342, "dur": 40151.071, + "args": { + "External id": 2942926,"Record function id": 0, "Sequence number": 29294611, "Fwd thread id": 1, "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016328107.860, "dur": 40140.112, + "args": { + "External id": 2942927,"Sequence number": 29294611, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1998 + } + }, + { + "ph": "f", "id": 177, "pid": 1336755, "tid": 1381158, "ts": 1555016328107.860, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 1336755, "tid": 1381158, + "ts": 1555016328137.727, "dur": 50.737, + "args": { + "External id": 2942928,"Record function id": 0, "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 1336755, "tid": 1381158, + "ts": 1555016328197.284, "dur": 64.953, + "args": { + "External id": 2942929,"Record function id": 0, "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 1336755, "tid": 1381158, + "ts": 1555016328272.317, "dur": 39967.820, + "args": { + "External id": 2942930,"Record function id": 0, "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016328357.124, "dur": 6.732, + "args": { + "External id": 2942931,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016328374.858, "dur": 4.965, + "args": { + "External id": 2942932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016328392.849, "dur": 38989.443, + "args": { + "External id": 2942933,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016328405.688, "dur": 38966.969, + "args": { + "External id": 2942934,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016328467.001, "dur": 13.135, + "args": { + "External id": 2942935,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016328487.344, "dur": 38845.104, + "args": { + "External id": 2942936,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016328489.761, "dur": 38841.850, + "args": { + "External id": 2942937,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016328492.834, "dur": 5.892, + "args": { + "External id": 2942938,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016328500.401, "dur": 38827.612, + "args": { + "External id": 2942939,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016367468.655, "dur": 8.613, + "args": { + "External id": 2942940,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016367471.420, "dur": 5.359, + "args": { + "External id": 2942941,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016367504.176, "dur": 392.569, + "args": { + "External id": 2942942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016367536.786, "dur": 355.607, + "args": { + "External id": 2942943,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2014, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016367551.465, "dur": 335.695, + "args": { + "External id": 2942944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016367919.134, "dur": 2.315, + "args": { + "External id": 2942945,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2016, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016367975.637, "dur": 38.237, + "args": { + "External id": 2942946,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368060.353, "dur": 3.873, + "args": { + "External id": 2942947,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368079.512, "dur": 1.218, + "args": { + "External id": 2942948,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368095.194, "dur": 0.718, + "args": { + "External id": 2942949,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368106.906, "dur": 0.847, + "args": { + "External id": 2942950,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368117.027, "dur": 2.961, + "args": { + "External id": 2942951,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368131.325, "dur": 1.076, + "args": { + "External id": 2942952,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368156.926, "dur": 3.527, + "args": { + "External id": 2942953,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368175.509, "dur": 0.993, + "args": { + "External id": 2942954,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016368272.801, "dur": 2795.705, + "args": { + "External id": 2942955,"Record function id": 0, "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 1336755, "tid": 1381158, + "ts": 1555016368292.369, "dur": 1074.743, + "args": { + "External id": 2942956,"Record function id": 0, "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 1336755, "tid": 1381158, + "ts": 1555016368307.662, "dur": 321.688, + "args": { + "External id": 2942957,"Record function id": 0, "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368391.066, "dur": 5.823, + "args": { + "External id": 2942958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368399.817, "dur": 1.134, + "args": { + "External id": 2942959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368402.506, "dur": 1.258, + "args": { + "External id": 2942960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368405.445, "dur": 0.852, + "args": { + "External id": 2942961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368409.988, "dur": 0.920, + "args": { + "External id": 2942962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368412.256, "dur": 0.897, + "args": { + "External id": 2942963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368414.641, "dur": 3.232, + "args": { + "External id": 2942964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368419.100, "dur": 1.024, + "args": { + "External id": 2942965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368423.834, "dur": 2.796, + "args": { + "External id": 2942966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016368427.934, "dur": 0.871, + "args": { + "External id": 2942967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016368445.563, "dur": 156.330, + "args": { + "External id": 2942968,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016368463.268, "dur": 131.156, + "args": { + "External id": 2942969,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016368483.639, "dur": 11.862, + "args": { + "External id": 2942970,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016368498.775, "dur": 66.992, + "args": { + "External id": 2942971,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016368503.538, "dur": 61.901, + "args": { + "External id": 2942972,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368506.806, "dur": 7.397, + "args": { + "External id": 2942973,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016368515.622, "dur": 49.043, + "args": { + "External id": 2942974,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 1336755, "tid": 1381158, + "ts": 1555016368713.622, "dur": 645.354, + "args": { + "External id": 2942975,"Record function id": 0, "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 1336755, "tid": 1381158, + "ts": 1555016368729.869, "dur": 617.653, + "args": { + "External id": 2942976,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016368811.070, "dur": 4.708, + "args": { + "External id": 2942977,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016368830.097, "dur": 33.299, + "args": { + "External id": 2942978,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368835.713, "dur": 3.287, + "args": { + "External id": 2942979,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368840.121, "dur": 0.239, + "args": { + "External id": 2942980,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368840.885, "dur": 0.305, + "args": { + "External id": 2942981,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368844.566, "dur": 2.117, + "args": { + "External id": 2942982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368847.362, "dur": 0.483, + "args": { + "External id": 2942983,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368848.338, "dur": 0.293, + "args": { + "External id": 2942984,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368851.562, "dur": 0.311, + "args": { + "External id": 2942985,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368853.374, "dur": 0.391, + "args": { + "External id": 2942986,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368855.249, "dur": 1.929, + "args": { + "External id": 2942987,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016368873.771, "dur": 34.735, + "args": { + "External id": 2942988,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016368937.503, "dur": 153.247, + "args": { + "External id": 2942989,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016368946.566, "dur": 3.243, + "args": { + "External id": 2942990,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016368955.289, "dur": 9.269, + "args": { + "External id": 2942991,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016368959.054, "dur": 5.093, + "args": { + "External id": 2942992,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368962.420, "dur": 0.507, + "args": { + "External id": 2942993,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016368971.653, "dur": 71.424, + "args": { + "External id": 2942994,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368973.782, "dur": 2.498, + "args": { + "External id": 2942995,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368977.993, "dur": 0.415, + "args": { + "External id": 2942996,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016368979.915, "dur": 38.478, + "args": { + "External id": 2942997,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016369021.786, "dur": 0.458, + "args": { + "External id": 2942998,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016369023.802, "dur": 0.439, + "args": { + "External id": 2942999,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016369027.194, "dur": 0.338, + "args": { + "External id": 2943000,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016369029.219, "dur": 0.248, + "args": { + "External id": 2943001,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016369030.768, "dur": 0.315, + "args": { + "External id": 2943002,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016369034.464, "dur": 2.114, + "args": { + "External id": 2943003,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016369057.844, "dur": 24.558, + "args": { + "External id": 2943004,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016369136.393, "dur": 138.885, + "args": { + "External id": 2943005,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016369182.696, "dur": 88.751, + "args": { + "External id": 2943006,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2077, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016369193.129, "dur": 74.035, + "args": { + "External id": 2943007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016369293.270, "dur": 1.754, + "args": { + "External id": 2943008,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2079, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016369374.911, "dur": 1671.480, + "args": { + "External id": 2943009,"Sequence number": 29294610, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2080 + } + }, + { + "ph": "f", "id": 178, "pid": 1336755, "tid": 1381158, "ts": 1555016369374.911, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016369482.687, "dur": 101.702, + "args": { + "External id": 2943010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016369625.435, "dur": 40.226, + "args": { + "External id": 2943011,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016369684.830, "dur": 49.787, + "args": { + "External id": 2943012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016369743.670, "dur": 32.149, + "args": { + "External id": 2943013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016369781.767, "dur": 44.912, + "args": { + "External id": 2943014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016369833.802, "dur": 29.973, + "args": { + "External id": 2943015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016369873.818, "dur": 44.087, + "args": { + "External id": 2943016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016369941.805, "dur": 24.820, + "args": { + "External id": 2943017,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016370025.319, "dur": 34.520, + "args": { + "External id": 2943018,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016370084.624, "dur": 22.435, + "args": { + "External id": 2943019,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016370122.909, "dur": 15.506, + "args": { + "External id": 2943020,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370165.595, "dur": 39.187, + "args": { + "External id": 2943021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370208.448, "dur": 38.369, + "args": { + "External id": 2943022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016370283.264, "dur": 172.604, + "args": { + "External id": 2943023,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016370365.246, "dur": 5.835, + "args": { + "External id": 2943024,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016370372.837, "dur": 2.538, + "args": { + "External id": 2943025,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016370492.092, "dur": 25.177, + "args": { + "External id": 2943026,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016370530.045, "dur": 16.609, + "args": { + "External id": 2943027,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370553.761, "dur": 36.689, + "args": { + "External id": 2943028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370598.436, "dur": 35.570, + "args": { + "External id": 2943029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370641.470, "dur": 21.332, + "args": { + "External id": 2943030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370667.128, "dur": 29.850, + "args": { + "External id": 2943031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370702.653, "dur": 34.326, + "args": { + "External id": 2943032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016370751.196, "dur": 36.815, + "args": { + "External id": 2943033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016370808.550, "dur": 25.400, + "args": { + "External id": 2943034,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016370854.237, "dur": 26.379, + "args": { + "External id": 2943035,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016370896.412, "dur": 17.840, + "args": { + "External id": 2943036,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016370935.732, "dur": 14.917, + "args": { + "External id": 2943037,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016370962.873, "dur": 18.993, + "args": { + "External id": 2943038,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371090.330, "dur": 17.355, + "args": { + "External id": 2943039,"Record function id": 0, "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371094.256, "dur": 12.488, + "args": { + "External id": 2943040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371098.472, "dur": 7.247, + "args": { + "External id": 2943041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371100.003, "dur": 5.588, + "args": { + "External id": 2943042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371111.515, "dur": 5.391, + "args": { + "External id": 2943043,"Record function id": 0, "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371113.634, "dur": 2.742, + "args": { + "External id": 2943044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371114.379, "dur": 1.593, + "args": { + "External id": 2943045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371114.865, "dur": 1.019, + "args": { + "External id": 2943046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371120.205, "dur": 4.206, + "args": { + "External id": 2943047,"Record function id": 0, "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371121.617, "dur": 2.382, + "args": { + "External id": 2943048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371122.228, "dur": 1.366, + "args": { + "External id": 2943049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371122.740, "dur": 0.778, + "args": { + "External id": 2943050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371127.502, "dur": 4.694, + "args": { + "External id": 2943051,"Record function id": 0, "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371129.507, "dur": 2.263, + "args": { + "External id": 2943052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371130.036, "dur": 1.205, + "args": { + "External id": 2943053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371130.508, "dur": 0.667, + "args": { + "External id": 2943054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371135.454, "dur": 4.146, + "args": { + "External id": 2943055,"Record function id": 0, "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371136.581, "dur": 2.613, + "args": { + "External id": 2943056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371137.099, "dur": 1.364, + "args": { + "External id": 2943057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371137.388, "dur": 1.002, + "args": { + "External id": 2943058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371157.062, "dur": 6.850, + "args": { + "External id": 2943059,"Record function id": 0, "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371158.994, "dur": 4.262, + "args": { + "External id": 2943060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371160.026, "dur": 2.422, + "args": { + "External id": 2943061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371160.554, "dur": 1.692, + "args": { + "External id": 2943062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371167.620, "dur": 5.918, + "args": { + "External id": 2943063,"Record function id": 0, "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371168.759, "dur": 4.362, + "args": { + "External id": 2943064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371169.434, "dur": 3.106, + "args": { + "External id": 2943065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371171.776, "dur": 0.655, + "args": { + "External id": 2943066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371176.716, "dur": 6.222, + "args": { + "External id": 2943067,"Record function id": 0, "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371178.085, "dur": 4.425, + "args": { + "External id": 2943068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371178.810, "dur": 3.161, + "args": { + "External id": 2943069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371179.116, "dur": 2.794, + "args": { + "External id": 2943070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371186.080, "dur": 4.610, + "args": { + "External id": 2943071,"Record function id": 0, "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016371187.477, "dur": 2.780, + "args": { + "External id": 2943072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371188.479, "dur": 1.228, + "args": { + "External id": 2943073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016371188.854, "dur": 0.777, + "args": { + "External id": 2943074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016371194.242, "dur": 38827.228, + "args": { + "External id": 2943075,"Record function id": 0, "Sequence number": 29294609, "Fwd thread id": 1, "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016371195.741, "dur": 38781.933, + "args": { + "External id": 2943076,"Sequence number": 29294609, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2147 + } + }, + { + "ph": "f", "id": 179, "pid": 1336755, "tid": 1381158, "ts": 1555016371195.741, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 1336755, "tid": 1381158, + "ts": 1555016371223.478, "dur": 39.319, + "args": { + "External id": 2943077,"Record function id": 0, "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 1336755, "tid": 1381158, + "ts": 1555016371271.115, "dur": 68.635, + "args": { + "External id": 2943078,"Record function id": 0, "Ev Idx": 2149 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 1336755, "tid": 1381158, + "ts": 1555016371347.757, "dur": 38622.342, + "args": { + "External id": 2943079,"Record function id": 0, "Ev Idx": 2150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016371435.630, "dur": 6.603, + "args": { + "External id": 2943080,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016371451.175, "dur": 4.475, + "args": { + "External id": 2943081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016371469.074, "dur": 37749.209, + "args": { + "External id": 2943082,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016371485.838, "dur": 37724.373, + "args": { + "External id": 2943083,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016371542.832, "dur": 12.761, + "args": { + "External id": 2943084,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016371561.857, "dur": 37607.835, + "args": { + "External id": 2943085,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016371566.428, "dur": 37602.581, + "args": { + "External id": 2943086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016371570.353, "dur": 4.280, + "args": { + "External id": 2943087,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016371576.408, "dur": 37589.029, + "args": { + "External id": 2943088,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016409307.337, "dur": 8.567, + "args": { + "External id": 2943089,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016409310.357, "dur": 5.173, + "args": { + "External id": 2943090,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016409348.774, "dur": 330.763, + "args": { + "External id": 2943091,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016409379.763, "dur": 295.089, + "args": { + "External id": 2943092,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2163, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016409390.268, "dur": 279.407, + "args": { + "External id": 2943093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016409701.681, "dur": 2.333, + "args": { + "External id": 2943094,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2165, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409756.592, "dur": 5.781, + "args": { + "External id": 2943095,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409807.001, "dur": 1.238, + "args": { + "External id": 2943096,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409824.956, "dur": 1.554, + "args": { + "External id": 2943097,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409839.563, "dur": 1.183, + "args": { + "External id": 2943098,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409854.042, "dur": 0.884, + "args": { + "External id": 2943099,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409867.649, "dur": 0.988, + "args": { + "External id": 2943100,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409879.509, "dur": 0.854, + "args": { + "External id": 2943101,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409893.054, "dur": 3.541, + "args": { + "External id": 2943102,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016409909.788, "dur": 0.755, + "args": { + "External id": 2943103,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016410037.831, "dur": 2716.636, + "args": { + "External id": 2943104,"Record function id": 0, "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 1336755, "tid": 1381158, + "ts": 1555016410058.528, "dur": 1033.040, + "args": { + "External id": 2943105,"Record function id": 0, "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 1336755, "tid": 1381158, + "ts": 1555016410072.519, "dur": 341.727, + "args": { + "External id": 2943106,"Record function id": 0, "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410177.157, "dur": 4.767, + "args": { + "External id": 2943107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410185.539, "dur": 1.334, + "args": { + "External id": 2943108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410188.860, "dur": 0.918, + "args": { + "External id": 2943109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410191.485, "dur": 1.114, + "args": { + "External id": 2943110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410195.942, "dur": 0.825, + "args": { + "External id": 2943111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410197.982, "dur": 1.243, + "args": { + "External id": 2943112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410200.713, "dur": 1.836, + "args": { + "External id": 2943113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410203.792, "dur": 3.030, + "args": { + "External id": 2943114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410210.296, "dur": 0.938, + "args": { + "External id": 2943115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016410212.679, "dur": 0.767, + "args": { + "External id": 2943116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016410231.340, "dur": 150.992, + "args": { + "External id": 2943117,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016410247.582, "dur": 130.160, + "args": { + "External id": 2943118,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016410266.639, "dur": 13.507, + "args": { + "External id": 2943119,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016410283.277, "dur": 66.357, + "args": { + "External id": 2943120,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016410285.568, "dur": 63.672, + "args": { + "External id": 2943121,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410289.589, "dur": 5.417, + "args": { + "External id": 2943122,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016410298.805, "dur": 49.871, + "args": { + "External id": 2943123,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 1336755, "tid": 1381158, + "ts": 1555016410509.355, "dur": 575.264, + "args": { + "External id": 2943124,"Record function id": 0, "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 1336755, "tid": 1381158, + "ts": 1555016410526.031, "dur": 546.761, + "args": { + "External id": 2943125,"Record function id": 0, "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016410586.935, "dur": 4.173, + "args": { + "External id": 2943126,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016410605.488, "dur": 33.042, + "args": { + "External id": 2943127,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410610.143, "dur": 1.573, + "args": { + "External id": 2943128,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410615.422, "dur": 0.437, + "args": { + "External id": 2943129,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410617.340, "dur": 3.004, + "args": { + "External id": 2943130,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410621.793, "dur": 1.613, + "args": { + "External id": 2943131,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410625.130, "dur": 0.417, + "args": { + "External id": 2943132,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410626.813, "dur": 0.300, + "args": { + "External id": 2943133,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410629.956, "dur": 0.346, + "args": { + "External id": 2943134,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410631.569, "dur": 0.290, + "args": { + "External id": 2943135,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410633.483, "dur": 0.289, + "args": { + "External id": 2943136,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016410648.476, "dur": 28.126, + "args": { + "External id": 2943137,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016410705.609, "dur": 99.055, + "args": { + "External id": 2943138,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016410714.570, "dur": 3.582, + "args": { + "External id": 2943139,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016410723.246, "dur": 12.847, + "args": { + "External id": 2943140,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016410727.008, "dur": 8.633, + "args": { + "External id": 2943141,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410730.980, "dur": 3.493, + "args": { + "External id": 2943142,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016410742.438, "dur": 27.126, + "args": { + "External id": 2943143,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410744.489, "dur": 0.478, + "args": { + "External id": 2943144,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410746.169, "dur": 0.622, + "args": { + "External id": 2943145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410748.176, "dur": 0.581, + "args": { + "External id": 2943146,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410751.390, "dur": 0.354, + "args": { + "External id": 2943147,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410753.125, "dur": 0.610, + "args": { + "External id": 2943148,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410755.105, "dur": 1.440, + "args": { + "External id": 2943149,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410758.055, "dur": 0.498, + "args": { + "External id": 2943150,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410759.938, "dur": 2.116, + "args": { + "External id": 2943151,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016410765.280, "dur": 0.346, + "args": { + "External id": 2943152,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016410780.225, "dur": 17.332, + "args": { + "External id": 2943153,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016410846.037, "dur": 112.985, + "args": { + "External id": 2943154,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016410874.524, "dur": 81.411, + "args": { + "External id": 2943155,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2226, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016410883.885, "dur": 68.011, + "args": { + "External id": 2943156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016410977.785, "dur": 1.724, + "args": { + "External id": 2943157,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2228, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016411098.246, "dur": 1629.430, + "args": { + "External id": 2943158,"Sequence number": 29294608, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2229 + } + }, + { + "ph": "f", "id": 180, "pid": 1336755, "tid": 1381158, "ts": 1555016411098.246, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411228.060, "dur": 105.454, + "args": { + "External id": 2943159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016411374.778, "dur": 40.016, + "args": { + "External id": 2943160,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411434.019, "dur": 49.081, + "args": { + "External id": 2943161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411492.553, "dur": 31.730, + "args": { + "External id": 2943162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411530.736, "dur": 45.228, + "args": { + "External id": 2943163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411585.004, "dur": 26.941, + "args": { + "External id": 2943164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411621.781, "dur": 41.774, + "args": { + "External id": 2943165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016411687.349, "dur": 25.636, + "args": { + "External id": 2943166,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016411730.799, "dur": 28.371, + "args": { + "External id": 2943167,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016411781.983, "dur": 20.007, + "args": { + "External id": 2943168,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016411815.460, "dur": 15.541, + "args": { + "External id": 2943169,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411841.774, "dur": 28.845, + "args": { + "External id": 2943170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016411873.433, "dur": 33.031, + "args": { + "External id": 2943171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016411931.448, "dur": 208.972, + "args": { + "External id": 2943172,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016412042.729, "dur": 8.226, + "args": { + "External id": 2943173,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016412053.054, "dur": 2.381, + "args": { + "External id": 2943174,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016412194.111, "dur": 26.846, + "args": { + "External id": 2943175,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016412235.911, "dur": 16.179, + "args": { + "External id": 2943176,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016412262.457, "dur": 42.069, + "args": { + "External id": 2943177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016412310.021, "dur": 37.092, + "args": { + "External id": 2943178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016412362.140, "dur": 21.388, + "args": { + "External id": 2943179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016412387.804, "dur": 30.390, + "args": { + "External id": 2943180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016412426.824, "dur": 21.174, + "args": { + "External id": 2943181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016412455.692, "dur": 41.930, + "args": { + "External id": 2943182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016412524.681, "dur": 24.708, + "args": { + "External id": 2943183,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016412568.912, "dur": 26.033, + "args": { + "External id": 2943184,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016412612.702, "dur": 21.405, + "args": { + "External id": 2943185,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016412651.682, "dur": 14.443, + "args": { + "External id": 2943186,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016412678.370, "dur": 19.012, + "args": { + "External id": 2943187,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412775.653, "dur": 14.297, + "args": { + "External id": 2943188,"Record function id": 0, "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412778.985, "dur": 9.923, + "args": { + "External id": 2943189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412782.844, "dur": 5.178, + "args": { + "External id": 2943190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412784.349, "dur": 3.581, + "args": { + "External id": 2943191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412793.898, "dur": 8.070, + "args": { + "External id": 2943192,"Record function id": 0, "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412795.213, "dur": 6.249, + "args": { + "External id": 2943193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412799.224, "dur": 1.553, + "args": { + "External id": 2943194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412799.618, "dur": 1.000, + "args": { + "External id": 2943195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412805.432, "dur": 4.919, + "args": { + "External id": 2943196,"Record function id": 0, "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412806.897, "dur": 3.027, + "args": { + "External id": 2943197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412807.938, "dur": 1.572, + "args": { + "External id": 2943198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412808.826, "dur": 0.585, + "args": { + "External id": 2943199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412813.656, "dur": 4.470, + "args": { + "External id": 2943200,"Record function id": 0, "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412814.998, "dur": 2.665, + "args": { + "External id": 2943201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412816.126, "dur": 1.044, + "args": { + "External id": 2943202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412816.442, "dur": 0.631, + "args": { + "External id": 2943203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412821.150, "dur": 3.749, + "args": { + "External id": 2943204,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412822.203, "dur": 2.274, + "args": { + "External id": 2943205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412823.033, "dur": 1.013, + "args": { + "External id": 2943206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412823.316, "dur": 0.657, + "args": { + "External id": 2943207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412827.963, "dur": 4.573, + "args": { + "External id": 2943208,"Record function id": 0, "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412829.376, "dur": 2.727, + "args": { + "External id": 2943209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412830.228, "dur": 1.237, + "args": { + "External id": 2943210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412830.785, "dur": 0.613, + "args": { + "External id": 2943211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412835.777, "dur": 5.753, + "args": { + "External id": 2943212,"Record function id": 0, "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412836.931, "dur": 4.185, + "args": { + "External id": 2943213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412837.422, "dur": 3.099, + "args": { + "External id": 2943214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412837.676, "dur": 2.757, + "args": { + "External id": 2943215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412844.502, "dur": 6.635, + "args": { + "External id": 2943216,"Record function id": 0, "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412845.772, "dur": 4.925, + "args": { + "External id": 2943217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412846.697, "dur": 3.461, + "args": { + "External id": 2943218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412849.175, "dur": 0.911, + "args": { + "External id": 2943219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412857.650, "dur": 3.509, + "args": { + "External id": 2943220,"Record function id": 0, "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016412858.641, "dur": 2.122, + "args": { + "External id": 2943221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412859.112, "dur": 1.249, + "args": { + "External id": 2943222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016412859.621, "dur": 0.666, + "args": { + "External id": 2943223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016412864.559, "dur": 39075.645, + "args": { + "External id": 2943224,"Record function id": 0, "Sequence number": 29294607, "Fwd thread id": 1, "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016412865.945, "dur": 39066.095, + "args": { + "External id": 2943225,"Sequence number": 29294607, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2296 + } + }, + { + "ph": "f", "id": 181, "pid": 1336755, "tid": 1381158, "ts": 1555016412865.945, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 1336755, "tid": 1381158, + "ts": 1555016412894.108, "dur": 37.517, + "args": { + "External id": 2943226,"Record function id": 0, "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 1336755, "tid": 1381158, + "ts": 1555016412938.658, "dur": 111.105, + "args": { + "External id": 2943227,"Record function id": 0, "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 1336755, "tid": 1381158, + "ts": 1555016413057.159, "dur": 38867.446, + "args": { + "External id": 2943228,"Record function id": 0, "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016413157.748, "dur": 7.644, + "args": { + "External id": 2943229,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016413177.354, "dur": 4.882, + "args": { + "External id": 2943230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016413199.237, "dur": 37911.181, + "args": { + "External id": 2943231,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016413212.958, "dur": 37889.080, + "args": { + "External id": 2943232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016413263.756, "dur": 13.061, + "args": { + "External id": 2943233,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016413282.849, "dur": 37774.994, + "args": { + "External id": 2943234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016413286.343, "dur": 37770.903, + "args": { + "External id": 2943235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016413290.162, "dur": 4.367, + "args": { + "External id": 2943236,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016413295.987, "dur": 37757.665, + "args": { + "External id": 2943237,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016451209.496, "dur": 7.979, + "args": { + "External id": 2943238,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016451212.287, "dur": 4.669, + "args": { + "External id": 2943239,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016451245.882, "dur": 378.488, + "args": { + "External id": 2943240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016451277.288, "dur": 342.482, + "args": { + "External id": 2943241,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2312, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016451289.132, "dur": 325.550, + "args": { + "External id": 2943242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016451647.261, "dur": 2.030, + "args": { + "External id": 2943243,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2314, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451702.778, "dur": 11.124, + "args": { + "External id": 2943244,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451756.441, "dur": 1.458, + "args": { + "External id": 2943245,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451777.622, "dur": 1.461, + "args": { + "External id": 2943246,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451793.164, "dur": 0.974, + "args": { + "External id": 2943247,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451807.395, "dur": 2.928, + "args": { + "External id": 2943248,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451821.730, "dur": 0.990, + "args": { + "External id": 2943249,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451835.883, "dur": 1.247, + "args": { + "External id": 2943250,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451849.815, "dur": 2.194, + "args": { + "External id": 2943251,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016451865.009, "dur": 2.675, + "args": { + "External id": 2943252,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016451953.779, "dur": 2789.788, + "args": { + "External id": 2943253,"Record function id": 0, "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 1336755, "tid": 1381158, + "ts": 1555016451971.550, "dur": 1071.292, + "args": { + "External id": 2943254,"Record function id": 0, "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 1336755, "tid": 1381158, + "ts": 1555016452023.208, "dur": 345.882, + "args": { + "External id": 2943255,"Record function id": 0, "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452112.800, "dur": 4.482, + "args": { + "External id": 2943256,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452120.319, "dur": 1.357, + "args": { + "External id": 2943257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452123.484, "dur": 0.949, + "args": { + "External id": 2943258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452126.361, "dur": 0.798, + "args": { + "External id": 2943259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452130.515, "dur": 1.195, + "args": { + "External id": 2943260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452132.963, "dur": 0.998, + "args": { + "External id": 2943261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452135.658, "dur": 3.712, + "args": { + "External id": 2943262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452141.052, "dur": 14.439, + "args": { + "External id": 2943263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452161.121, "dur": 0.917, + "args": { + "External id": 2943264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016452163.544, "dur": 0.772, + "args": { + "External id": 2943265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016452182.160, "dur": 153.534, + "args": { + "External id": 2943266,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016452198.433, "dur": 132.757, + "args": { + "External id": 2943267,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016452219.479, "dur": 12.112, + "args": { + "External id": 2943268,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016452234.854, "dur": 67.504, + "args": { + "External id": 2943269,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016452237.657, "dur": 64.368, + "args": { + "External id": 2943270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452242.088, "dur": 6.672, + "args": { + "External id": 2943271,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016452250.842, "dur": 50.647, + "args": { + "External id": 2943272,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 1336755, "tid": 1381158, + "ts": 1555016452461.185, "dur": 574.070, + "args": { + "External id": 2943273,"Record function id": 0, "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 1336755, "tid": 1381158, + "ts": 1555016452476.872, "dur": 542.201, + "args": { + "External id": 2943274,"Record function id": 0, "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016452538.528, "dur": 4.583, + "args": { + "External id": 2943275,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016452557.667, "dur": 31.326, + "args": { + "External id": 2943276,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452562.542, "dur": 1.653, + "args": { + "External id": 2943277,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452566.565, "dur": 2.515, + "args": { + "External id": 2943278,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452570.948, "dur": 0.438, + "args": { + "External id": 2943279,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452573.360, "dur": 0.389, + "args": { + "External id": 2943280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452575.366, "dur": 0.323, + "args": { + "External id": 2943281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452577.473, "dur": 0.666, + "args": { + "External id": 2943282,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452579.839, "dur": 0.412, + "args": { + "External id": 2943283,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452582.520, "dur": 0.409, + "args": { + "External id": 2943284,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452584.732, "dur": 0.348, + "args": { + "External id": 2943285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016452602.690, "dur": 34.206, + "args": { + "External id": 2943286,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016452669.701, "dur": 103.335, + "args": { + "External id": 2943287,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016452679.406, "dur": 5.037, + "args": { + "External id": 2943288,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016452690.279, "dur": 9.898, + "args": { + "External id": 2943289,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016452694.228, "dur": 5.511, + "args": { + "External id": 2943290,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452698.162, "dur": 0.510, + "args": { + "External id": 2943291,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016452707.274, "dur": 28.747, + "args": { + "External id": 2943292,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452710.057, "dur": 0.816, + "args": { + "External id": 2943293,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452712.898, "dur": 0.424, + "args": { + "External id": 2943294,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452714.810, "dur": 0.711, + "args": { + "External id": 2943295,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452717.500, "dur": 0.600, + "args": { + "External id": 2943296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452720.048, "dur": 0.332, + "args": { + "External id": 2943297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452722.201, "dur": 0.420, + "args": { + "External id": 2943298,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452724.921, "dur": 2.796, + "args": { + "External id": 2943299,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452729.850, "dur": 0.371, + "args": { + "External id": 2943300,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016452731.916, "dur": 0.355, + "args": { + "External id": 2943301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016452747.544, "dur": 18.561, + "args": { + "External id": 2943302,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016452814.739, "dur": 106.808, + "args": { + "External id": 2943303,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016452836.157, "dur": 82.156, + "args": { + "External id": 2943304,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2375, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016452845.724, "dur": 67.549, + "args": { + "External id": 2943305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016452936.591, "dur": 1.774, + "args": { + "External id": 2943306,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2377, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016453051.858, "dur": 1666.054, + "args": { + "External id": 2943307,"Sequence number": 29294606, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2378 + } + }, + { + "ph": "f", "id": 182, "pid": 1336755, "tid": 1381158, "ts": 1555016453051.858, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453178.772, "dur": 105.412, + "args": { + "External id": 2943308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016453323.045, "dur": 39.276, + "args": { + "External id": 2943309,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453382.471, "dur": 49.155, + "args": { + "External id": 2943310,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453440.493, "dur": 32.373, + "args": { + "External id": 2943311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453479.694, "dur": 46.406, + "args": { + "External id": 2943312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453535.967, "dur": 28.241, + "args": { + "External id": 2943313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453571.940, "dur": 42.698, + "args": { + "External id": 2943314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016453636.353, "dur": 23.922, + "args": { + "External id": 2943315,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016453687.717, "dur": 31.363, + "args": { + "External id": 2943316,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016453739.560, "dur": 20.719, + "args": { + "External id": 2943317,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016453773.146, "dur": 16.048, + "args": { + "External id": 2943318,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453800.525, "dur": 29.639, + "args": { + "External id": 2943319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016453833.261, "dur": 38.608, + "args": { + "External id": 2943320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016453898.065, "dur": 221.503, + "args": { + "External id": 2943321,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016453976.882, "dur": 43.294, + "args": { + "External id": 2943322,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016454023.907, "dur": 5.059, + "args": { + "External id": 2943323,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016454167.979, "dur": 27.045, + "args": { + "External id": 2943324,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016454206.973, "dur": 15.731, + "args": { + "External id": 2943325,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016454232.946, "dur": 42.940, + "args": { + "External id": 2943326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016454283.226, "dur": 38.726, + "args": { + "External id": 2943327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016454329.068, "dur": 25.039, + "args": { + "External id": 2943328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016454362.298, "dur": 31.554, + "args": { + "External id": 2943329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016454404.284, "dur": 36.628, + "args": { + "External id": 2943330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016454454.882, "dur": 36.350, + "args": { + "External id": 2943331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016454513.266, "dur": 23.388, + "args": { + "External id": 2943332,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016454553.708, "dur": 28.600, + "args": { + "External id": 2943333,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016454597.196, "dur": 18.636, + "args": { + "External id": 2943334,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016454632.317, "dur": 16.340, + "args": { + "External id": 2943335,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016454669.505, "dur": 21.015, + "args": { + "External id": 2943336,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454765.320, "dur": 14.922, + "args": { + "External id": 2943337,"Record function id": 0, "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454769.063, "dur": 10.251, + "args": { + "External id": 2943338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454772.737, "dur": 5.688, + "args": { + "External id": 2943339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454774.438, "dur": 3.852, + "args": { + "External id": 2943340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454784.037, "dur": 5.398, + "args": { + "External id": 2943341,"Record function id": 0, "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454785.769, "dur": 3.220, + "args": { + "External id": 2943342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454786.611, "dur": 1.914, + "args": { + "External id": 2943343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454787.381, "dur": 1.022, + "args": { + "External id": 2943344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454792.625, "dur": 4.972, + "args": { + "External id": 2943345,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454794.203, "dur": 2.992, + "args": { + "External id": 2943346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454795.162, "dur": 1.579, + "args": { + "External id": 2943347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454795.588, "dur": 1.083, + "args": { + "External id": 2943348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454800.728, "dur": 5.953, + "args": { + "External id": 2943349,"Record function id": 0, "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454802.016, "dur": 4.266, + "args": { + "External id": 2943350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454802.521, "dur": 3.140, + "args": { + "External id": 2943351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454802.857, "dur": 2.736, + "args": { + "External id": 2943352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454809.809, "dur": 4.049, + "args": { + "External id": 2943353,"Record function id": 0, "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454811.057, "dur": 2.392, + "args": { + "External id": 2943354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454811.577, "dur": 1.249, + "args": { + "External id": 2943355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454811.899, "dur": 0.835, + "args": { + "External id": 2943356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454817.005, "dur": 4.388, + "args": { + "External id": 2943357,"Record function id": 0, "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454818.355, "dur": 2.580, + "args": { + "External id": 2943358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454819.014, "dur": 1.316, + "args": { + "External id": 2943359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454819.471, "dur": 0.710, + "args": { + "External id": 2943360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454824.582, "dur": 3.966, + "args": { + "External id": 2943361,"Record function id": 0, "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454826.124, "dur": 1.969, + "args": { + "External id": 2943362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454826.620, "dur": 1.072, + "args": { + "External id": 2943363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454826.943, "dur": 0.647, + "args": { + "External id": 2943364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454831.760, "dur": 3.886, + "args": { + "External id": 2943365,"Record function id": 0, "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454833.133, "dur": 2.084, + "args": { + "External id": 2943366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454833.756, "dur": 1.068, + "args": { + "External id": 2943367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454834.053, "dur": 0.676, + "args": { + "External id": 2943368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454838.706, "dur": 4.000, + "args": { + "External id": 2943369,"Record function id": 0, "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016454839.964, "dur": 2.285, + "args": { + "External id": 2943370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454840.445, "dur": 1.262, + "args": { + "External id": 2943371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016454840.891, "dur": 0.648, + "args": { + "External id": 2943372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016454846.390, "dur": 36999.030, + "args": { + "External id": 2943373,"Record function id": 0, "Sequence number": 29294605, "Fwd thread id": 1, "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016454847.939, "dur": 36989.104, + "args": { + "External id": 2943374,"Sequence number": 29294605, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2445 + } + }, + { + "ph": "f", "id": 183, "pid": 1336755, "tid": 1381158, "ts": 1555016454847.939, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 1336755, "tid": 1381158, + "ts": 1555016454875.969, "dur": 36.326, + "args": { + "External id": 2943375,"Record function id": 0, "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 1336755, "tid": 1381158, + "ts": 1555016454919.828, "dur": 106.113, + "args": { + "External id": 2943376,"Record function id": 0, "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 1336755, "tid": 1381158, + "ts": 1555016455034.547, "dur": 36794.529, + "args": { + "External id": 2943377,"Record function id": 0, "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016455124.422, "dur": 6.442, + "args": { + "External id": 2943378,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016455140.755, "dur": 18.049, + "args": { + "External id": 2943379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016455174.439, "dur": 35864.433, + "args": { + "External id": 2943380,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016455188.293, "dur": 35841.030, + "args": { + "External id": 2943381,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016455241.418, "dur": 15.234, + "args": { + "External id": 2943382,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016455262.950, "dur": 35701.450, + "args": { + "External id": 2943383,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016455265.635, "dur": 35698.220, + "args": { + "External id": 2943384,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016455269.734, "dur": 4.933, + "args": { + "External id": 2943385,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016455276.552, "dur": 35683.656, + "args": { + "External id": 2943386,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016491128.429, "dur": 8.855, + "args": { + "External id": 2943387,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016491131.600, "dur": 5.354, + "args": { + "External id": 2943388,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016491179.015, "dur": 358.204, + "args": { + "External id": 2943389,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016491205.855, "dur": 326.328, + "args": { + "External id": 2943390,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2461, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016491218.473, "dur": 308.253, + "args": { + "External id": 2943391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016491556.408, "dur": 2.048, + "args": { + "External id": 2943392,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2463, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491615.003, "dur": 6.445, + "args": { + "External id": 2943393,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491665.560, "dur": 1.513, + "args": { + "External id": 2943394,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491684.015, "dur": 0.919, + "args": { + "External id": 2943395,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491700.668, "dur": 0.865, + "args": { + "External id": 2943396,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491713.342, "dur": 0.858, + "args": { + "External id": 2943397,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491725.886, "dur": 1.334, + "args": { + "External id": 2943398,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491739.479, "dur": 1.085, + "args": { + "External id": 2943399,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491754.995, "dur": 1.730, + "args": { + "External id": 2943400,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016491768.090, "dur": 0.935, + "args": { + "External id": 2943401,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016491859.806, "dur": 2834.324, + "args": { + "External id": 2943402,"Record function id": 0, "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 1336755, "tid": 1381158, + "ts": 1555016491878.808, "dur": 1047.665, + "args": { + "External id": 2943403,"Record function id": 0, "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 1336755, "tid": 1381158, + "ts": 1555016491895.498, "dur": 397.828, + "args": { + "External id": 2943404,"Record function id": 0, "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016491977.350, "dur": 42.227, + "args": { + "External id": 2943405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492025.629, "dur": 2.373, + "args": { + "External id": 2943406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492030.269, "dur": 1.018, + "args": { + "External id": 2943407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492033.112, "dur": 3.155, + "args": { + "External id": 2943408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492037.729, "dur": 1.027, + "args": { + "External id": 2943409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492041.140, "dur": 0.900, + "args": { + "External id": 2943410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492043.563, "dur": 1.756, + "args": { + "External id": 2943411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492046.939, "dur": 1.159, + "args": { + "External id": 2943412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492049.527, "dur": 1.077, + "args": { + "External id": 2943413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016492051.992, "dur": 1.049, + "args": { + "External id": 2943414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016492071.864, "dur": 184.006, + "args": { + "External id": 2943415,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016492102.968, "dur": 147.768, + "args": { + "External id": 2943416,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016492119.709, "dur": 12.325, + "args": { + "External id": 2943417,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016492135.398, "dur": 86.055, + "args": { + "External id": 2943418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016492138.023, "dur": 83.085, + "args": { + "External id": 2943419,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492156.567, "dur": 8.958, + "args": { + "External id": 2943420,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016492167.605, "dur": 52.956, + "args": { + "External id": 2943421,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 1336755, "tid": 1381158, + "ts": 1555016492388.416, "dur": 529.819, + "args": { + "External id": 2943422,"Record function id": 0, "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 1336755, "tid": 1381158, + "ts": 1555016492406.440, "dur": 499.989, + "args": { + "External id": 2943423,"Record function id": 0, "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016492471.352, "dur": 4.611, + "args": { + "External id": 2943424,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016492493.613, "dur": 32.471, + "args": { + "External id": 2943425,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492498.119, "dur": 1.595, + "args": { + "External id": 2943426,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492502.103, "dur": 0.470, + "args": { + "External id": 2943427,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492504.594, "dur": 0.765, + "args": { + "External id": 2943428,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492507.126, "dur": 0.644, + "args": { + "External id": 2943429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492509.672, "dur": 0.822, + "args": { + "External id": 2943430,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492512.519, "dur": 0.440, + "args": { + "External id": 2943431,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492514.712, "dur": 2.291, + "args": { + "External id": 2943432,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492518.567, "dur": 0.425, + "args": { + "External id": 2943433,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492520.991, "dur": 0.427, + "args": { + "External id": 2943434,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016492535.293, "dur": 30.060, + "args": { + "External id": 2943435,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016492594.907, "dur": 99.049, + "args": { + "External id": 2943436,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016492605.175, "dur": 3.184, + "args": { + "External id": 2943437,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016492614.016, "dur": 9.892, + "args": { + "External id": 2943438,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016492618.237, "dur": 5.279, + "args": { + "External id": 2943439,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492621.817, "dur": 0.551, + "args": { + "External id": 2943440,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016492631.473, "dur": 27.050, + "args": { + "External id": 2943441,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492633.716, "dur": 0.462, + "args": { + "External id": 2943442,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492635.835, "dur": 0.618, + "args": { + "External id": 2943443,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492638.306, "dur": 0.342, + "args": { + "External id": 2943444,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492640.419, "dur": 2.840, + "args": { + "External id": 2943445,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492645.086, "dur": 0.422, + "args": { + "External id": 2943446,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492647.293, "dur": 0.424, + "args": { + "External id": 2943447,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492649.623, "dur": 0.367, + "args": { + "External id": 2943448,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492651.495, "dur": 0.389, + "args": { + "External id": 2943449,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016492653.947, "dur": 0.377, + "args": { + "External id": 2943450,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016492668.339, "dur": 18.581, + "args": { + "External id": 2943451,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016492737.927, "dur": 107.872, + "args": { + "External id": 2943452,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016492760.242, "dur": 82.293, + "args": { + "External id": 2943453,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2524, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016492769.643, "dur": 68.970, + "args": { + "External id": 2943454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016492859.560, "dur": 1.629, + "args": { + "External id": 2943455,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2526, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016492934.400, "dur": 1730.531, + "args": { + "External id": 2943456,"Sequence number": 29294604, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2527 + } + }, + { + "ph": "f", "id": 184, "pid": 1336755, "tid": 1381158, "ts": 1555016492934.400, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493084.385, "dur": 119.659, + "args": { + "External id": 2943457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016493246.106, "dur": 42.413, + "args": { + "External id": 2943458,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493307.340, "dur": 55.777, + "args": { + "External id": 2943459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493373.029, "dur": 33.505, + "args": { + "External id": 2943460,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493416.046, "dur": 46.337, + "args": { + "External id": 2943461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493470.277, "dur": 28.680, + "args": { + "External id": 2943462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493506.294, "dur": 44.494, + "args": { + "External id": 2943463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016493573.551, "dur": 24.088, + "args": { + "External id": 2943464,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016493617.864, "dur": 30.614, + "args": { + "External id": 2943465,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016493667.673, "dur": 21.125, + "args": { + "External id": 2943466,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016493701.016, "dur": 15.470, + "args": { + "External id": 2943467,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493726.251, "dur": 29.821, + "args": { + "External id": 2943468,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016493759.519, "dur": 34.858, + "args": { + "External id": 2943469,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016493822.334, "dur": 211.076, + "args": { + "External id": 2943470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016493901.950, "dur": 5.365, + "args": { + "External id": 2943471,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016493909.203, "dur": 2.026, + "args": { + "External id": 2943472,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016494097.158, "dur": 28.944, + "args": { + "External id": 2943473,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016494155.986, "dur": 20.408, + "args": { + "External id": 2943474,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016494187.943, "dur": 46.997, + "args": { + "External id": 2943475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016494242.047, "dur": 40.797, + "args": { + "External id": 2943476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016494293.802, "dur": 22.597, + "args": { + "External id": 2943477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016494322.142, "dur": 32.771, + "args": { + "External id": 2943478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016494361.136, "dur": 24.907, + "args": { + "External id": 2943479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016494393.864, "dur": 49.571, + "args": { + "External id": 2943480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016494468.224, "dur": 24.383, + "args": { + "External id": 2943481,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016494513.912, "dur": 24.316, + "args": { + "External id": 2943482,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016494552.445, "dur": 21.779, + "args": { + "External id": 2943483,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016494589.980, "dur": 16.011, + "args": { + "External id": 2943484,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016494620.462, "dur": 16.304, + "args": { + "External id": 2943485,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494716.111, "dur": 46.201, + "args": { + "External id": 2943486,"Record function id": 0, "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494719.612, "dur": 41.661, + "args": { + "External id": 2943487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494723.519, "dur": 36.822, + "args": { + "External id": 2943488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494755.953, "dur": 4.205, + "args": { + "External id": 2943489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494766.329, "dur": 5.119, + "args": { + "External id": 2943490,"Record function id": 0, "Ev Idx": 2561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494767.928, "dur": 3.061, + "args": { + "External id": 2943491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494768.619, "dur": 1.945, + "args": { + "External id": 2943492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494769.434, "dur": 1.011, + "args": { + "External id": 2943493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494774.603, "dur": 6.075, + "args": { + "External id": 2943494,"Record function id": 0, "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494776.047, "dur": 4.224, + "args": { + "External id": 2943495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494776.544, "dur": 3.189, + "args": { + "External id": 2943496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494776.938, "dur": 2.703, + "args": { + "External id": 2943497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494783.925, "dur": 4.032, + "args": { + "External id": 2943498,"Record function id": 0, "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494785.332, "dur": 2.181, + "args": { + "External id": 2943499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494785.869, "dur": 1.072, + "args": { + "External id": 2943500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494786.156, "dur": 0.702, + "args": { + "External id": 2943501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494793.735, "dur": 3.898, + "args": { + "External id": 2943502,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494794.917, "dur": 2.313, + "args": { + "External id": 2943503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494795.558, "dur": 1.111, + "args": { + "External id": 2943504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494795.844, "dur": 0.753, + "args": { + "External id": 2943505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494800.720, "dur": 4.071, + "args": { + "External id": 2943506,"Record function id": 0, "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494802.094, "dur": 2.294, + "args": { + "External id": 2943507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494802.621, "dur": 1.106, + "args": { + "External id": 2943508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494803.034, "dur": 0.622, + "args": { + "External id": 2943509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494809.032, "dur": 4.551, + "args": { + "External id": 2943510,"Record function id": 0, "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494810.727, "dur": 2.402, + "args": { + "External id": 2943511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494811.234, "dur": 1.158, + "args": { + "External id": 2943512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494811.505, "dur": 0.798, + "args": { + "External id": 2943513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494816.685, "dur": 5.722, + "args": { + "External id": 2943514,"Record function id": 0, "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494817.879, "dur": 4.097, + "args": { + "External id": 2943515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494818.368, "dur": 2.997, + "args": { + "External id": 2943516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494820.593, "dur": 0.662, + "args": { + "External id": 2943517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494825.595, "dur": 3.711, + "args": { + "External id": 2943518,"Record function id": 0, "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016494826.969, "dur": 1.924, + "args": { + "External id": 2943519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494827.567, "dur": 0.927, + "args": { + "External id": 2943520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016494827.854, "dur": 0.551, + "args": { + "External id": 2943521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016494832.810, "dur": 36814.424, + "args": { + "External id": 2943522,"Record function id": 0, "Sequence number": 29294603, "Fwd thread id": 1, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016494834.238, "dur": 36804.744, + "args": { + "External id": 2943523,"Sequence number": 29294603, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2594 + } + }, + { + "ph": "f", "id": 185, "pid": 1336755, "tid": 1381158, "ts": 1555016494834.238, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 1336755, "tid": 1381158, + "ts": 1555016494863.725, "dur": 35.699, + "args": { + "External id": 2943524,"Record function id": 0, "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 1336755, "tid": 1381158, + "ts": 1555016494906.946, "dur": 67.749, + "args": { + "External id": 2943525,"Record function id": 0, "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 1336755, "tid": 1381158, + "ts": 1555016494980.085, "dur": 36650.755, + "args": { + "External id": 2943526,"Record function id": 0, "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016495107.152, "dur": 7.158, + "args": { + "External id": 2943527,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016495124.449, "dur": 6.875, + "args": { + "External id": 2943528,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016495165.058, "dur": 35576.284, + "args": { + "External id": 2943529,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016495178.871, "dur": 35554.110, + "args": { + "External id": 2943530,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016495225.146, "dur": 14.046, + "args": { + "External id": 2943531,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016495245.600, "dur": 35446.061, + "args": { + "External id": 2943532,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016495248.108, "dur": 35443.023, + "args": { + "External id": 2943533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016495252.367, "dur": 5.215, + "args": { + "External id": 2943534,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016495259.678, "dur": 35427.921, + "args": { + "External id": 2943535,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016530831.013, "dur": 8.902, + "args": { + "External id": 2943536,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016530834.042, "dur": 5.535, + "args": { + "External id": 2943537,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016530870.040, "dur": 464.404, + "args": { + "External id": 2943538,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016530895.431, "dur": 433.825, + "args": { + "External id": 2943539,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2610, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016530906.467, "dur": 416.940, + "args": { + "External id": 2943540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016531355.915, "dur": 2.665, + "args": { + "External id": 2943541,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2612, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531418.316, "dur": 6.185, + "args": { + "External id": 2943542,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531467.864, "dur": 1.350, + "args": { + "External id": 2943543,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531487.678, "dur": 3.363, + "args": { + "External id": 2943544,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531504.534, "dur": 1.253, + "args": { + "External id": 2943545,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531517.640, "dur": 1.106, + "args": { + "External id": 2943546,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531530.726, "dur": 1.277, + "args": { + "External id": 2943547,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531544.119, "dur": 2.865, + "args": { + "External id": 2943548,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531560.459, "dur": 1.765, + "args": { + "External id": 2943549,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531573.548, "dur": 0.718, + "args": { + "External id": 2943550,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016531661.675, "dur": 2807.063, + "args": { + "External id": 2943551,"Record function id": 0, "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 1336755, "tid": 1381158, + "ts": 1555016531680.145, "dur": 1063.802, + "args": { + "External id": 2943552,"Record function id": 0, "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 1336755, "tid": 1381158, + "ts": 1555016531694.748, "dur": 371.212, + "args": { + "External id": 2943553,"Record function id": 0, "Ev Idx": 2624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531776.973, "dur": 4.172, + "args": { + "External id": 2943554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531784.760, "dur": 1.345, + "args": { + "External id": 2943555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531787.945, "dur": 3.166, + "args": { + "External id": 2943556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531792.866, "dur": 0.984, + "args": { + "External id": 2943557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531795.396, "dur": 1.359, + "args": { + "External id": 2943558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531798.456, "dur": 0.876, + "args": { + "External id": 2943559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531800.963, "dur": 2.801, + "args": { + "External id": 2943560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531805.085, "dur": 1.164, + "args": { + "External id": 2943561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531807.853, "dur": 0.989, + "args": { + "External id": 2943562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016531810.415, "dur": 1.165, + "args": { + "External id": 2943563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016531828.863, "dur": 201.308, + "args": { + "External id": 2943564,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016531844.217, "dur": 180.366, + "args": { + "External id": 2943565,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016531864.539, "dur": 13.293, + "args": { + "External id": 2943566,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016531880.932, "dur": 80.783, + "args": { + "External id": 2943567,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016531883.203, "dur": 78.025, + "args": { + "External id": 2943568,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016531887.428, "dur": 5.564, + "args": { + "External id": 2943569,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016531894.680, "dur": 65.373, + "args": { + "External id": 2943570,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 1336755, "tid": 1381158, + "ts": 1555016532179.988, "dur": 556.252, + "args": { + "External id": 2943571,"Record function id": 0, "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 1336755, "tid": 1381158, + "ts": 1555016532198.921, "dur": 525.154, + "args": { + "External id": 2943572,"Record function id": 0, "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016532269.195, "dur": 5.790, + "args": { + "External id": 2943573,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016532290.376, "dur": 33.449, + "args": { + "External id": 2943574,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532295.952, "dur": 1.590, + "args": { + "External id": 2943575,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532300.004, "dur": 0.488, + "args": { + "External id": 2943576,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532302.450, "dur": 0.426, + "args": { + "External id": 2943577,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532304.507, "dur": 0.401, + "args": { + "External id": 2943578,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532306.440, "dur": 0.693, + "args": { + "External id": 2943579,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532308.929, "dur": 2.657, + "args": { + "External id": 2943580,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532313.306, "dur": 0.503, + "args": { + "External id": 2943581,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532315.473, "dur": 0.363, + "args": { + "External id": 2943582,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532317.367, "dur": 0.415, + "args": { + "External id": 2943583,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016532335.566, "dur": 34.499, + "args": { + "External id": 2943584,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016532402.851, "dur": 104.781, + "args": { + "External id": 2943585,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016532413.206, "dur": 2.844, + "args": { + "External id": 2943586,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016532421.625, "dur": 11.444, + "args": { + "External id": 2943587,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016532425.881, "dur": 6.775, + "args": { + "External id": 2943588,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532430.230, "dur": 1.221, + "args": { + "External id": 2943589,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016532440.035, "dur": 27.628, + "args": { + "External id": 2943590,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532442.282, "dur": 0.477, + "args": { + "External id": 2943591,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532444.680, "dur": 0.344, + "args": { + "External id": 2943592,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532447.140, "dur": 2.371, + "args": { + "External id": 2943593,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532451.467, "dur": 0.466, + "args": { + "External id": 2943594,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532453.645, "dur": 0.545, + "args": { + "External id": 2943595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532455.842, "dur": 0.391, + "args": { + "External id": 2943596,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532458.275, "dur": 0.366, + "args": { + "External id": 2943597,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532460.095, "dur": 0.430, + "args": { + "External id": 2943598,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016532462.072, "dur": 0.582, + "args": { + "External id": 2943599,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016532480.139, "dur": 20.065, + "args": { + "External id": 2943600,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016532552.870, "dur": 108.589, + "args": { + "External id": 2943601,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016532574.747, "dur": 83.148, + "args": { + "External id": 2943602,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2673, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016532583.650, "dur": 70.087, + "args": { + "External id": 2943603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016532675.085, "dur": 1.854, + "args": { + "External id": 2943604,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2675, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016532750.841, "dur": 1697.351, + "args": { + "External id": 2943605,"Sequence number": 29294602, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2676 + } + }, + { + "ph": "f", "id": 186, "pid": 1336755, "tid": 1381158, "ts": 1555016532750.841, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016532858.412, "dur": 101.796, + "args": { + "External id": 2943606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016533044.810, "dur": 49.603, + "args": { + "External id": 2943607,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533114.946, "dur": 74.112, + "args": { + "External id": 2943608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533204.382, "dur": 36.918, + "args": { + "External id": 2943609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533249.390, "dur": 46.905, + "args": { + "External id": 2943610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533304.373, "dur": 27.969, + "args": { + "External id": 2943611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533340.917, "dur": 43.339, + "args": { + "External id": 2943612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016533409.714, "dur": 25.145, + "args": { + "External id": 2943613,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016533454.836, "dur": 31.073, + "args": { + "External id": 2943614,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016533503.995, "dur": 20.880, + "args": { + "External id": 2943615,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016533536.241, "dur": 16.977, + "args": { + "External id": 2943616,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533562.914, "dur": 30.810, + "args": { + "External id": 2943617,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533597.006, "dur": 35.614, + "args": { + "External id": 2943618,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016533659.493, "dur": 172.649, + "args": { + "External id": 2943619,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016533739.786, "dur": 5.928, + "args": { + "External id": 2943620,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016533747.756, "dur": 2.286, + "args": { + "External id": 2943621,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016533864.504, "dur": 24.077, + "args": { + "External id": 2943622,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016533900.162, "dur": 15.693, + "args": { + "External id": 2943623,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533923.889, "dur": 34.970, + "args": { + "External id": 2943624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016533965.846, "dur": 73.234, + "args": { + "External id": 2943625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016534051.896, "dur": 26.958, + "args": { + "External id": 2943626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016534085.316, "dur": 33.195, + "args": { + "External id": 2943627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016534125.863, "dur": 36.769, + "args": { + "External id": 2943628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016534173.589, "dur": 52.947, + "args": { + "External id": 2943629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016534254.250, "dur": 29.453, + "args": { + "External id": 2943630,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016534300.976, "dur": 26.131, + "args": { + "External id": 2943631,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016534341.365, "dur": 18.600, + "args": { + "External id": 2943632,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016534374.348, "dur": 15.852, + "args": { + "External id": 2943633,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016534401.216, "dur": 16.972, + "args": { + "External id": 2943634,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534495.021, "dur": 15.270, + "args": { + "External id": 2943635,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534498.454, "dur": 11.030, + "args": { + "External id": 2943636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534502.568, "dur": 6.095, + "args": { + "External id": 2943637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534504.242, "dur": 4.287, + "args": { + "External id": 2943638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534514.110, "dur": 8.021, + "args": { + "External id": 2943639,"Record function id": 0, "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534515.713, "dur": 5.936, + "args": { + "External id": 2943640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534516.586, "dur": 4.603, + "args": { + "External id": 2943641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534517.701, "dur": 3.382, + "args": { + "External id": 2943642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534525.422, "dur": 5.048, + "args": { + "External id": 2943643,"Record function id": 0, "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534526.890, "dur": 3.176, + "args": { + "External id": 2943644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534527.721, "dur": 1.792, + "args": { + "External id": 2943645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534528.421, "dur": 1.002, + "args": { + "External id": 2943646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534533.818, "dur": 4.399, + "args": { + "External id": 2943647,"Record function id": 0, "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534534.984, "dur": 2.806, + "args": { + "External id": 2943648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534535.754, "dur": 1.466, + "args": { + "External id": 2943649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534536.072, "dur": 1.080, + "args": { + "External id": 2943650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534541.280, "dur": 4.686, + "args": { + "External id": 2943651,"Record function id": 0, "Ev Idx": 2722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534543.094, "dur": 2.489, + "args": { + "External id": 2943652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534543.732, "dur": 1.464, + "args": { + "External id": 2943653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534544.407, "dur": 0.718, + "args": { + "External id": 2943654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534549.078, "dur": 4.189, + "args": { + "External id": 2943655,"Record function id": 0, "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534550.236, "dur": 2.640, + "args": { + "External id": 2943656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534550.929, "dur": 1.421, + "args": { + "External id": 2943657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534551.352, "dur": 0.872, + "args": { + "External id": 2943658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534556.554, "dur": 4.225, + "args": { + "External id": 2943659,"Record function id": 0, "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534557.768, "dur": 2.604, + "args": { + "External id": 2943660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534558.513, "dur": 1.468, + "args": { + "External id": 2943661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534558.972, "dur": 0.854, + "args": { + "External id": 2943662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534563.805, "dur": 3.623, + "args": { + "External id": 2943663,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534564.831, "dur": 2.201, + "args": { + "External id": 2943664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534565.425, "dur": 1.220, + "args": { + "External id": 2943665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534565.705, "dur": 0.836, + "args": { + "External id": 2943666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534570.948, "dur": 6.477, + "args": { + "External id": 2943667,"Record function id": 0, "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016534572.258, "dur": 4.761, + "args": { + "External id": 2943668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534572.759, "dur": 3.716, + "args": { + "External id": 2943669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016534573.558, "dur": 2.786, + "args": { + "External id": 2943670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016534581.552, "dur": 39860.525, + "args": { + "External id": 2943671,"Record function id": 0, "Sequence number": 29294601, "Fwd thread id": 1, "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016534586.270, "dur": 39847.054, + "args": { + "External id": 2943672,"Sequence number": 29294601, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2743 + } + }, + { + "ph": "f", "id": 187, "pid": 1336755, "tid": 1381158, "ts": 1555016534586.270, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 1336755, "tid": 1381158, + "ts": 1555016534612.525, "dur": 37.579, + "args": { + "External id": 2943673,"Record function id": 0, "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 1336755, "tid": 1381158, + "ts": 1555016534657.471, "dur": 63.981, + "args": { + "External id": 2943674,"Record function id": 0, "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 1336755, "tid": 1381158, + "ts": 1555016534727.448, "dur": 39698.289, + "args": { + "External id": 2943675,"Record function id": 0, "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016534807.593, "dur": 5.831, + "args": { + "External id": 2943676,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016534822.925, "dur": 4.817, + "args": { + "External id": 2943677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016534842.426, "dur": 38639.908, + "args": { + "External id": 2943678,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016534855.908, "dur": 38617.091, + "args": { + "External id": 2943679,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016534926.914, "dur": 13.264, + "args": { + "External id": 2943680,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016534946.326, "dur": 38485.742, + "args": { + "External id": 2943681,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016534948.811, "dur": 38482.602, + "args": { + "External id": 2943682,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016534952.788, "dur": 4.536, + "args": { + "External id": 2943683,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016534959.193, "dur": 38468.499, + "args": { + "External id": 2943684,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016573575.164, "dur": 9.120, + "args": { + "External id": 2943685,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016573578.411, "dur": 5.511, + "args": { + "External id": 2943686,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016573618.297, "dur": 481.781, + "args": { + "External id": 2943687,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016573647.570, "dur": 446.486, + "args": { + "External id": 2943688,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2759, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016573659.275, "dur": 428.454, + "args": { + "External id": 2943689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016574123.007, "dur": 2.448, + "args": { + "External id": 2943690,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2761, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574203.175, "dur": 6.687, + "args": { + "External id": 2943691,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574255.173, "dur": 3.468, + "args": { + "External id": 2943692,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574276.178, "dur": 1.105, + "args": { + "External id": 2943693,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574291.051, "dur": 0.899, + "args": { + "External id": 2943694,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574305.390, "dur": 1.197, + "args": { + "External id": 2943695,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574318.346, "dur": 3.302, + "args": { + "External id": 2943696,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574334.694, "dur": 1.482, + "args": { + "External id": 2943697,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574349.336, "dur": 1.911, + "args": { + "External id": 2943698,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574362.939, "dur": 1.133, + "args": { + "External id": 2943699,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016574457.628, "dur": 2810.531, + "args": { + "External id": 2943700,"Record function id": 0, "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 1336755, "tid": 1381158, + "ts": 1555016574477.022, "dur": 1040.968, + "args": { + "External id": 2943701,"Record function id": 0, "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 1336755, "tid": 1381158, + "ts": 1555016574493.275, "dur": 320.483, + "args": { + "External id": 2943702,"Record function id": 0, "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574575.592, "dur": 5.871, + "args": { + "External id": 2943703,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574584.866, "dur": 1.154, + "args": { + "External id": 2943704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574587.810, "dur": 0.957, + "args": { + "External id": 2943705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574590.564, "dur": 0.964, + "args": { + "External id": 2943706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574592.939, "dur": 1.302, + "args": { + "External id": 2943707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574601.421, "dur": 1.050, + "args": { + "External id": 2943708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574604.072, "dur": 1.727, + "args": { + "External id": 2943709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574607.129, "dur": 1.051, + "args": { + "External id": 2943710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574609.872, "dur": 2.926, + "args": { + "External id": 2943711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016574614.300, "dur": 1.262, + "args": { + "External id": 2943712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016574632.753, "dur": 150.524, + "args": { + "External id": 2943713,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016574648.831, "dur": 129.942, + "args": { + "External id": 2943714,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016574666.115, "dur": 11.548, + "args": { + "External id": 2943715,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016574680.922, "dur": 66.063, + "args": { + "External id": 2943716,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016574683.644, "dur": 62.987, + "args": { + "External id": 2943717,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016574687.732, "dur": 5.645, + "args": { + "External id": 2943718,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016574695.275, "dur": 50.667, + "args": { + "External id": 2943719,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 1336755, "tid": 1381158, + "ts": 1555016574904.540, "dur": 605.103, + "args": { + "External id": 2943720,"Record function id": 0, "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 1336755, "tid": 1381158, + "ts": 1555016574920.629, "dur": 576.308, + "args": { + "External id": 2943721,"Record function id": 0, "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016575016.795, "dur": 6.972, + "args": { + "External id": 2943722,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016575041.086, "dur": 33.011, + "args": { + "External id": 2943723,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575046.339, "dur": 1.519, + "args": { + "External id": 2943724,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575050.170, "dur": 0.858, + "args": { + "External id": 2943725,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575052.759, "dur": 0.773, + "args": { + "External id": 2943726,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575055.639, "dur": 2.402, + "args": { + "External id": 2943727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575059.809, "dur": 0.433, + "args": { + "External id": 2943728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575061.895, "dur": 0.385, + "args": { + "External id": 2943729,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575064.268, "dur": 0.396, + "args": { + "External id": 2943730,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575066.588, "dur": 0.420, + "args": { + "External id": 2943731,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575068.490, "dur": 1.028, + "args": { + "External id": 2943732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016575084.582, "dur": 35.066, + "args": { + "External id": 2943733,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016575166.595, "dur": 111.167, + "args": { + "External id": 2943734,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016575177.526, "dur": 4.791, + "args": { + "External id": 2943735,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016575188.385, "dur": 10.535, + "args": { + "External id": 2943736,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016575192.363, "dur": 6.121, + "args": { + "External id": 2943737,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575196.291, "dur": 0.696, + "args": { + "External id": 2943738,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016575206.289, "dur": 29.987, + "args": { + "External id": 2943739,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575209.256, "dur": 2.737, + "args": { + "External id": 2943740,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575214.097, "dur": 0.459, + "args": { + "External id": 2943741,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575216.650, "dur": 0.508, + "args": { + "External id": 2943742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575219.429, "dur": 0.327, + "args": { + "External id": 2943743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575221.463, "dur": 0.546, + "args": { + "External id": 2943744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575223.954, "dur": 0.347, + "args": { + "External id": 2943745,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575226.304, "dur": 0.361, + "args": { + "External id": 2943746,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575228.422, "dur": 0.341, + "args": { + "External id": 2943747,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016575230.430, "dur": 2.301, + "args": { + "External id": 2943748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016575247.170, "dur": 23.296, + "args": { + "External id": 2943749,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016575323.230, "dur": 110.723, + "args": { + "External id": 2943750,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016575345.127, "dur": 85.259, + "args": { + "External id": 2943751,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2822, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016575354.505, "dur": 71.600, + "args": { + "External id": 2943752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016575447.729, "dur": 1.760, + "args": { + "External id": 2943753,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2824, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016575525.893, "dur": 1720.316, + "args": { + "External id": 2943754,"Sequence number": 29294600, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2825 + } + }, + { + "ph": "f", "id": 188, "pid": 1336755, "tid": 1381158, "ts": 1555016575525.893, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016575632.586, "dur": 102.178, + "args": { + "External id": 2943755,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016575771.723, "dur": 41.792, + "args": { + "External id": 2943756,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016575833.472, "dur": 52.329, + "args": { + "External id": 2943757,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016575895.240, "dur": 33.333, + "args": { + "External id": 2943758,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016575935.781, "dur": 87.352, + "args": { + "External id": 2943759,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576036.654, "dur": 35.270, + "args": { + "External id": 2943760,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576083.441, "dur": 44.868, + "args": { + "External id": 2943761,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016576170.286, "dur": 28.555, + "args": { + "External id": 2943762,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016576218.320, "dur": 31.550, + "args": { + "External id": 2943763,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016576273.121, "dur": 19.825, + "args": { + "External id": 2943764,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016576305.872, "dur": 16.479, + "args": { + "External id": 2943765,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576335.254, "dur": 37.593, + "args": { + "External id": 2943766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576376.448, "dur": 40.949, + "args": { + "External id": 2943767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016576447.976, "dur": 173.607, + "args": { + "External id": 2943768,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016576526.032, "dur": 6.553, + "args": { + "External id": 2943769,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016576534.678, "dur": 2.754, + "args": { + "External id": 2943770,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016576653.589, "dur": 25.245, + "args": { + "External id": 2943771,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016576691.088, "dur": 16.120, + "args": { + "External id": 2943772,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576715.691, "dur": 36.364, + "args": { + "External id": 2943773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576761.611, "dur": 37.225, + "args": { + "External id": 2943774,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576806.782, "dur": 22.360, + "args": { + "External id": 2943775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576835.026, "dur": 30.926, + "args": { + "External id": 2943776,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576873.025, "dur": 21.097, + "args": { + "External id": 2943777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016576905.672, "dur": 32.416, + "args": { + "External id": 2943778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016576973.571, "dur": 69.013, + "args": { + "External id": 2943779,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016577064.160, "dur": 25.920, + "args": { + "External id": 2943780,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016577105.842, "dur": 21.135, + "args": { + "External id": 2943781,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016577158.255, "dur": 19.807, + "args": { + "External id": 2943782,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016577196.652, "dur": 17.951, + "args": { + "External id": 2943783,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577289.982, "dur": 20.142, + "args": { + "External id": 2943784,"Record function id": 0, "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577293.238, "dur": 16.028, + "args": { + "External id": 2943785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577296.972, "dur": 11.277, + "args": { + "External id": 2943786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577302.080, "dur": 6.042, + "args": { + "External id": 2943787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577313.993, "dur": 6.069, + "args": { + "External id": 2943788,"Record function id": 0, "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577315.742, "dur": 3.858, + "args": { + "External id": 2943789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577317.003, "dur": 2.022, + "args": { + "External id": 2943790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577317.919, "dur": 0.972, + "args": { + "External id": 2943791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577323.880, "dur": 4.753, + "args": { + "External id": 2943792,"Record function id": 0, "Ev Idx": 2863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577325.429, "dur": 2.802, + "args": { + "External id": 2943793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577326.081, "dur": 1.719, + "args": { + "External id": 2943794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577326.593, "dur": 1.142, + "args": { + "External id": 2943795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577331.975, "dur": 3.909, + "args": { + "External id": 2943796,"Record function id": 0, "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577333.302, "dur": 2.173, + "args": { + "External id": 2943797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577333.840, "dur": 1.085, + "args": { + "External id": 2943798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577334.160, "dur": 0.682, + "args": { + "External id": 2943799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577339.184, "dur": 4.221, + "args": { + "External id": 2943800,"Record function id": 0, "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577340.663, "dur": 2.329, + "args": { + "External id": 2943801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577341.264, "dur": 1.141, + "args": { + "External id": 2943802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577341.523, "dur": 0.817, + "args": { + "External id": 2943803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577346.962, "dur": 4.111, + "args": { + "External id": 2943804,"Record function id": 0, "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577348.048, "dur": 2.632, + "args": { + "External id": 2943805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577348.943, "dur": 1.194, + "args": { + "External id": 2943806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577349.417, "dur": 0.626, + "args": { + "External id": 2943807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577354.662, "dur": 6.882, + "args": { + "External id": 2943808,"Record function id": 0, "Ev Idx": 2879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577355.867, "dur": 5.272, + "args": { + "External id": 2943809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577356.633, "dur": 4.099, + "args": { + "External id": 2943810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577356.922, "dur": 3.713, + "args": { + "External id": 2943811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577364.974, "dur": 6.495, + "args": { + "External id": 2943812,"Record function id": 0, "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577366.177, "dur": 4.843, + "args": { + "External id": 2943813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577366.970, "dur": 3.632, + "args": { + "External id": 2943814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577367.286, "dur": 3.186, + "args": { + "External id": 2943815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577374.730, "dur": 4.150, + "args": { + "External id": 2943816,"Record function id": 0, "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016577375.906, "dur": 2.560, + "args": { + "External id": 2943817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577376.663, "dur": 1.399, + "args": { + "External id": 2943818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016577377.175, "dur": 0.798, + "args": { + "External id": 2943819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016577382.887, "dur": 36226.682, + "args": { + "External id": 2943820,"Record function id": 0, "Sequence number": 29294599, "Fwd thread id": 1, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016577384.604, "dur": 36216.845, + "args": { + "External id": 2943821,"Sequence number": 29294599, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2892 + } + }, + { + "ph": "f", "id": 189, "pid": 1336755, "tid": 1381158, "ts": 1555016577384.604, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 1336755, "tid": 1381158, + "ts": 1555016577411.595, "dur": 38.985, + "args": { + "External id": 2943822,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 1336755, "tid": 1381158, + "ts": 1555016577457.780, "dur": 66.869, + "args": { + "External id": 2943823,"Record function id": 0, "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 1336755, "tid": 1381158, + "ts": 1555016577530.712, "dur": 36063.360, + "args": { + "External id": 2943824,"Record function id": 0, "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016577615.387, "dur": 6.533, + "args": { + "External id": 2943825,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016577631.060, "dur": 4.447, + "args": { + "External id": 2943826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016577650.176, "dur": 35086.055, + "args": { + "External id": 2943827,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016577663.344, "dur": 35063.718, + "args": { + "External id": 2943828,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016577744.033, "dur": 12.986, + "args": { + "External id": 2943829,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016577763.576, "dur": 34923.943, + "args": { + "External id": 2943830,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016577765.971, "dur": 34920.911, + "args": { + "External id": 2943831,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016577770.349, "dur": 4.576, + "args": { + "External id": 2943832,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016577776.691, "dur": 34906.694, + "args": { + "External id": 2943833,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016612824.075, "dur": 8.706, + "args": { + "External id": 2943834,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016612827.153, "dur": 5.259, + "args": { + "External id": 2943835,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016612865.151, "dur": 420.957, + "args": { + "External id": 2943836,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016612891.867, "dur": 388.978, + "args": { + "External id": 2943837,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2908, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016612903.576, "dur": 371.511, + "args": { + "External id": 2943838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016613307.792, "dur": 2.409, + "args": { + "External id": 2943839,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2910, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613373.319, "dur": 6.363, + "args": { + "External id": 2943840,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613427.345, "dur": 1.252, + "args": { + "External id": 2943841,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613446.009, "dur": 1.201, + "args": { + "External id": 2943842,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613460.319, "dur": 0.844, + "args": { + "External id": 2943843,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613474.196, "dur": 1.030, + "args": { + "External id": 2943844,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613489.026, "dur": 1.122, + "args": { + "External id": 2943845,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613502.281, "dur": 1.080, + "args": { + "External id": 2943846,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613516.872, "dur": 1.856, + "args": { + "External id": 2943847,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613531.278, "dur": 0.858, + "args": { + "External id": 2943848,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016613624.746, "dur": 2865.934, + "args": { + "External id": 2943849,"Record function id": 0, "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 1336755, "tid": 1381158, + "ts": 1555016613643.952, "dur": 1056.432, + "args": { + "External id": 2943850,"Record function id": 0, "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 1336755, "tid": 1381158, + "ts": 1555016613658.420, "dur": 313.766, + "args": { + "External id": 2943851,"Record function id": 0, "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613739.572, "dur": 4.291, + "args": { + "External id": 2943852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613747.181, "dur": 1.323, + "args": { + "External id": 2943853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613750.806, "dur": 1.451, + "args": { + "External id": 2943854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613753.939, "dur": 0.971, + "args": { + "External id": 2943855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613756.411, "dur": 0.941, + "args": { + "External id": 2943856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613758.991, "dur": 1.174, + "args": { + "External id": 2943857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613761.756, "dur": 1.749, + "args": { + "External id": 2943858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613765.028, "dur": 3.237, + "args": { + "External id": 2943859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613769.746, "dur": 0.895, + "args": { + "External id": 2943860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016613772.401, "dur": 1.489, + "args": { + "External id": 2943861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016613793.404, "dur": 148.578, + "args": { + "External id": 2943862,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016613808.783, "dur": 128.808, + "args": { + "External id": 2943863,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016613830.423, "dur": 11.537, + "args": { + "External id": 2943864,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016613845.012, "dur": 64.052, + "args": { + "External id": 2943865,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016613847.608, "dur": 61.154, + "args": { + "External id": 2943866,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016613851.605, "dur": 5.460, + "args": { + "External id": 2943867,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016613859.026, "dur": 49.234, + "args": { + "External id": 2943868,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 1336755, "tid": 1381158, + "ts": 1555016614111.125, "dur": 581.007, + "args": { + "External id": 2943869,"Record function id": 0, "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 1336755, "tid": 1381158, + "ts": 1555016614129.687, "dur": 549.990, + "args": { + "External id": 2943870,"Record function id": 0, "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016614214.939, "dur": 5.761, + "args": { + "External id": 2943871,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016614236.512, "dur": 36.436, + "args": { + "External id": 2943872,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614241.644, "dur": 1.801, + "args": { + "External id": 2943873,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614245.614, "dur": 0.682, + "args": { + "External id": 2943874,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614248.140, "dur": 2.517, + "args": { + "External id": 2943875,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614252.343, "dur": 0.495, + "args": { + "External id": 2943876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614254.962, "dur": 0.354, + "args": { + "External id": 2943877,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614257.183, "dur": 0.334, + "args": { + "External id": 2943878,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614263.434, "dur": 0.521, + "args": { + "External id": 2943879,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614265.858, "dur": 0.323, + "args": { + "External id": 2943880,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614267.756, "dur": 0.445, + "args": { + "External id": 2943881,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016614283.006, "dur": 41.477, + "args": { + "External id": 2943882,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016614359.316, "dur": 104.862, + "args": { + "External id": 2943883,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016614369.731, "dur": 3.010, + "args": { + "External id": 2943884,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016614378.474, "dur": 12.020, + "args": { + "External id": 2943885,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016614382.553, "dur": 7.536, + "args": { + "External id": 2943886,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614386.441, "dur": 2.550, + "args": { + "External id": 2943887,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016614397.872, "dur": 25.071, + "args": { + "External id": 2943888,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614399.958, "dur": 0.728, + "args": { + "External id": 2943889,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614402.693, "dur": 0.610, + "args": { + "External id": 2943890,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614405.092, "dur": 0.387, + "args": { + "External id": 2943891,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614407.288, "dur": 0.422, + "args": { + "External id": 2943892,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614409.406, "dur": 0.499, + "args": { + "External id": 2943893,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614411.397, "dur": 0.359, + "args": { + "External id": 2943894,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614413.312, "dur": 0.320, + "args": { + "External id": 2943895,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614415.223, "dur": 2.258, + "args": { + "External id": 2943896,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016614419.159, "dur": 0.417, + "args": { + "External id": 2943897,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016614437.124, "dur": 19.645, + "args": { + "External id": 2943898,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016614508.135, "dur": 106.911, + "args": { + "External id": 2943899,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016614532.877, "dur": 78.881, + "args": { + "External id": 2943900,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2971, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016614542.665, "dur": 65.015, + "args": { + "External id": 2943901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016614630.454, "dur": 1.910, + "args": { + "External id": 2943902,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2973, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016614707.208, "dur": 1759.638, + "args": { + "External id": 2943903,"Sequence number": 29294598, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2974 + } + }, + { + "ph": "f", "id": 190, "pid": 1336755, "tid": 1381158, "ts": 1555016614707.208, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016614812.352, "dur": 101.318, + "args": { + "External id": 2943904,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016614950.173, "dur": 83.351, + "args": { + "External id": 2943905,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615057.581, "dur": 59.738, + "args": { + "External id": 2943906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615128.483, "dur": 51.961, + "args": { + "External id": 2943907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615191.006, "dur": 49.837, + "args": { + "External id": 2943908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615249.615, "dur": 29.683, + "args": { + "External id": 2943909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615289.402, "dur": 43.562, + "args": { + "External id": 2943910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016615359.259, "dur": 27.433, + "args": { + "External id": 2943911,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016615405.848, "dur": 31.111, + "args": { + "External id": 2943912,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016615458.826, "dur": 21.700, + "args": { + "External id": 2943913,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016615494.110, "dur": 18.248, + "args": { + "External id": 2943914,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615525.672, "dur": 32.084, + "args": { + "External id": 2943915,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615561.452, "dur": 35.857, + "args": { + "External id": 2943916,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016615626.038, "dur": 181.865, + "args": { + "External id": 2943917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016615703.717, "dur": 5.676, + "args": { + "External id": 2943918,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016615711.639, "dur": 2.510, + "args": { + "External id": 2943919,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016615841.409, "dur": 24.811, + "args": { + "External id": 2943920,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016615878.039, "dur": 15.775, + "args": { + "External id": 2943921,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615905.000, "dur": 38.512, + "args": { + "External id": 2943922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016615950.553, "dur": 77.870, + "args": { + "External id": 2943923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016616040.492, "dur": 28.735, + "args": { + "External id": 2943924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016616075.621, "dur": 36.737, + "args": { + "External id": 2943925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016616135.658, "dur": 50.650, + "args": { + "External id": 2943926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016616198.224, "dur": 37.014, + "args": { + "External id": 2943927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016616257.562, "dur": 25.771, + "args": { + "External id": 2943928,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016616304.568, "dur": 28.316, + "args": { + "External id": 2943929,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016616352.431, "dur": 18.372, + "args": { + "External id": 2943930,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016616386.448, "dur": 16.392, + "args": { + "External id": 2943931,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016616419.243, "dur": 17.292, + "args": { + "External id": 2943932,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616513.602, "dur": 14.520, + "args": { + "External id": 2943933,"Record function id": 0, "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616516.408, "dur": 10.879, + "args": { + "External id": 2943934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616520.477, "dur": 5.768, + "args": { + "External id": 2943935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616522.313, "dur": 3.808, + "args": { + "External id": 2943936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616532.258, "dur": 6.246, + "args": { + "External id": 2943937,"Record function id": 0, "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616533.729, "dur": 4.319, + "args": { + "External id": 2943938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616535.065, "dur": 2.410, + "args": { + "External id": 2943939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616536.195, "dur": 1.112, + "args": { + "External id": 2943940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616542.118, "dur": 5.577, + "args": { + "External id": 2943941,"Record function id": 0, "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616543.671, "dur": 3.608, + "args": { + "External id": 2943942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616544.926, "dur": 1.818, + "args": { + "External id": 2943943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616545.455, "dur": 1.180, + "args": { + "External id": 2943944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616551.192, "dur": 4.383, + "args": { + "External id": 2943945,"Record function id": 0, "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616552.559, "dur": 2.630, + "args": { + "External id": 2943946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616553.319, "dur": 1.456, + "args": { + "External id": 2943947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616553.842, "dur": 0.851, + "args": { + "External id": 2943948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616559.363, "dur": 4.020, + "args": { + "External id": 2943949,"Record function id": 0, "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616560.788, "dur": 2.171, + "args": { + "External id": 2943950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616561.357, "dur": 1.168, + "args": { + "External id": 2943951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616561.644, "dur": 0.791, + "args": { + "External id": 2943952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616566.744, "dur": 4.175, + "args": { + "External id": 2943953,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616567.933, "dur": 2.541, + "args": { + "External id": 2943954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616568.827, "dur": 1.223, + "args": { + "External id": 2943955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616569.271, "dur": 0.676, + "args": { + "External id": 2943956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616574.525, "dur": 5.761, + "args": { + "External id": 2943957,"Record function id": 0, "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616575.738, "dur": 4.104, + "args": { + "External id": 2943958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616576.579, "dur": 2.845, + "args": { + "External id": 2943959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616576.857, "dur": 2.487, + "args": { + "External id": 2943960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616583.617, "dur": 4.406, + "args": { + "External id": 2943961,"Record function id": 0, "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616584.875, "dur": 2.699, + "args": { + "External id": 2943962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616585.791, "dur": 1.355, + "args": { + "External id": 2943963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616586.252, "dur": 0.808, + "args": { + "External id": 2943964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616591.405, "dur": 4.491, + "args": { + "External id": 2943965,"Record function id": 0, "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016616592.397, "dur": 3.090, + "args": { + "External id": 2943966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616593.102, "dur": 1.667, + "args": { + "External id": 2943967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016616593.740, "dur": 0.877, + "args": { + "External id": 2943968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016616600.106, "dur": 36347.300, + "args": { + "External id": 2943969,"Record function id": 0, "Sequence number": 29294597, "Fwd thread id": 1, "Ev Idx": 3040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016616601.796, "dur": 36337.351, + "args": { + "External id": 2943970,"Sequence number": 29294597, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3041 + } + }, + { + "ph": "f", "id": 191, "pid": 1336755, "tid": 1381158, "ts": 1555016616601.796, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 1336755, "tid": 1381158, + "ts": 1555016616629.823, "dur": 37.773, + "args": { + "External id": 2943971,"Record function id": 0, "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 1336755, "tid": 1381158, + "ts": 1555016616675.739, "dur": 65.269, + "args": { + "External id": 2943972,"Record function id": 0, "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 1336755, "tid": 1381158, + "ts": 1555016616746.794, "dur": 36184.834, + "args": { + "External id": 2943973,"Record function id": 0, "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016616832.320, "dur": 6.187, + "args": { + "External id": 2943974,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016616847.417, "dur": 4.184, + "args": { + "External id": 2943975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016616865.468, "dur": 35233.722, + "args": { + "External id": 2943976,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016616912.011, "dur": 35178.149, + "args": { + "External id": 2943977,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016616960.055, "dur": 13.163, + "args": { + "External id": 2943978,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016616979.474, "dur": 35068.928, + "args": { + "External id": 2943979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016617024.138, "dur": 35023.592, + "args": { + "External id": 2943980,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016617028.881, "dur": 6.910, + "args": { + "External id": 2943981,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016617038.042, "dur": 35006.492, + "args": { + "External id": 2943982,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016652200.371, "dur": 8.913, + "args": { + "External id": 2943983,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016652203.662, "dur": 5.176, + "args": { + "External id": 2943984,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016652240.646, "dur": 388.253, + "args": { + "External id": 2943985,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016652267.563, "dur": 356.462, + "args": { + "External id": 2943986,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3057, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016652278.930, "dur": 339.267, + "args": { + "External id": 2943987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016652648.714, "dur": 2.498, + "args": { + "External id": 2943988,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3059, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652710.343, "dur": 8.130, + "args": { + "External id": 2943989,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652762.660, "dur": 1.515, + "args": { + "External id": 2943990,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652782.047, "dur": 0.993, + "args": { + "External id": 2943991,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652797.435, "dur": 1.003, + "args": { + "External id": 2943992,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652811.666, "dur": 2.332, + "args": { + "External id": 2943993,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652825.917, "dur": 1.062, + "args": { + "External id": 2943994,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652839.411, "dur": 0.956, + "args": { + "External id": 2943995,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652853.768, "dur": 2.138, + "args": { + "External id": 2943996,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016652868.539, "dur": 2.817, + "args": { + "External id": 2943997,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016652961.543, "dur": 2859.003, + "args": { + "External id": 2943998,"Record function id": 0, "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 1336755, "tid": 1381158, + "ts": 1555016652979.087, "dur": 1132.092, + "args": { + "External id": 2943999,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 1336755, "tid": 1381158, + "ts": 1555016653029.918, "dur": 369.859, + "args": { + "External id": 2944000,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653159.737, "dur": 5.463, + "args": { + "External id": 2944001,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653170.543, "dur": 1.289, + "args": { + "External id": 2944002,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653173.660, "dur": 1.020, + "args": { + "External id": 2944003,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653176.217, "dur": 0.917, + "args": { + "External id": 2944004,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653178.576, "dur": 1.119, + "args": { + "External id": 2944005,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653182.878, "dur": 0.728, + "args": { + "External id": 2944006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653185.421, "dur": 4.100, + "args": { + "External id": 2944007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653191.013, "dur": 0.949, + "args": { + "External id": 2944008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653193.540, "dur": 0.853, + "args": { + "External id": 2944009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016653195.935, "dur": 0.838, + "args": { + "External id": 2944010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016653216.216, "dur": 149.136, + "args": { + "External id": 2944011,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016653233.571, "dur": 127.485, + "args": { + "External id": 2944012,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016653249.491, "dur": 12.258, + "args": { + "External id": 2944013,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016653264.724, "dur": 66.444, + "args": { + "External id": 2944014,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016653267.170, "dur": 63.595, + "args": { + "External id": 2944015,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653271.460, "dur": 5.665, + "args": { + "External id": 2944016,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016653279.025, "dur": 51.220, + "args": { + "External id": 2944017,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 1336755, "tid": 1381158, + "ts": 1555016653499.174, "dur": 603.032, + "args": { + "External id": 2944018,"Record function id": 0, "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 1336755, "tid": 1381158, + "ts": 1555016653516.282, "dur": 572.438, + "args": { + "External id": 2944019,"Record function id": 0, "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016653583.180, "dur": 4.673, + "args": { + "External id": 2944020,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016653603.715, "dur": 31.804, + "args": { + "External id": 2944021,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653608.936, "dur": 1.573, + "args": { + "External id": 2944022,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653612.743, "dur": 2.764, + "args": { + "External id": 2944023,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653617.520, "dur": 0.454, + "args": { + "External id": 2944024,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653619.655, "dur": 0.473, + "args": { + "External id": 2944025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653621.841, "dur": 0.381, + "args": { + "External id": 2944026,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653623.973, "dur": 0.444, + "args": { + "External id": 2944027,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653626.027, "dur": 0.420, + "args": { + "External id": 2944028,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653628.380, "dur": 0.561, + "args": { + "External id": 2944029,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653630.706, "dur": 0.361, + "args": { + "External id": 2944030,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016653645.298, "dur": 29.820, + "args": { + "External id": 2944031,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016653707.580, "dur": 108.175, + "args": { + "External id": 2944032,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016653717.146, "dur": 5.384, + "args": { + "External id": 2944033,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016653727.893, "dur": 9.978, + "args": { + "External id": 2944034,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016653731.859, "dur": 5.598, + "args": { + "External id": 2944035,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653735.671, "dur": 0.493, + "args": { + "External id": 2944036,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016653744.924, "dur": 26.037, + "args": { + "External id": 2944037,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653747.380, "dur": 0.497, + "args": { + "External id": 2944038,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653749.600, "dur": 0.413, + "args": { + "External id": 2944039,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653751.873, "dur": 0.394, + "args": { + "External id": 2944040,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653754.033, "dur": 0.329, + "args": { + "External id": 2944041,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653756.142, "dur": 0.342, + "args": { + "External id": 2944042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653758.120, "dur": 0.386, + "args": { + "External id": 2944043,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653760.146, "dur": 2.443, + "args": { + "External id": 2944044,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653764.439, "dur": 0.482, + "args": { + "External id": 2944045,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016653766.686, "dur": 0.369, + "args": { + "External id": 2944046,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016653787.338, "dur": 21.351, + "args": { + "External id": 2944047,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016653859.354, "dur": 111.585, + "args": { + "External id": 2944048,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016653884.609, "dur": 82.851, + "args": { + "External id": 2944049,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3120, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016653893.867, "dur": 68.931, + "args": { + "External id": 2944050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016654029.317, "dur": 3.278, + "args": { + "External id": 2944051,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3122, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016654119.779, "dur": 1678.364, + "args": { + "External id": 2944052,"Sequence number": 29294596, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3123 + } + }, + { + "ph": "f", "id": 192, "pid": 1336755, "tid": 1381158, "ts": 1555016654119.779, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654252.417, "dur": 104.461, + "args": { + "External id": 2944053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016654394.909, "dur": 41.614, + "args": { + "External id": 2944054,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654457.706, "dur": 51.408, + "args": { + "External id": 2944055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654518.713, "dur": 32.976, + "args": { + "External id": 2944056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654559.145, "dur": 46.198, + "args": { + "External id": 2944057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654615.928, "dur": 29.346, + "args": { + "External id": 2944058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654653.870, "dur": 42.904, + "args": { + "External id": 2944059,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016654719.084, "dur": 23.075, + "args": { + "External id": 2944060,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016654760.372, "dur": 30.359, + "args": { + "External id": 2944061,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016654810.903, "dur": 19.940, + "args": { + "External id": 2944062,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016654844.478, "dur": 17.369, + "args": { + "External id": 2944063,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654873.446, "dur": 29.448, + "args": { + "External id": 2944064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016654906.644, "dur": 33.819, + "args": { + "External id": 2944065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016654966.256, "dur": 236.312, + "args": { + "External id": 2944066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016655084.441, "dur": 6.370, + "args": { + "External id": 2944067,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016655093.007, "dur": 5.222, + "args": { + "External id": 2944068,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016655238.132, "dur": 27.032, + "args": { + "External id": 2944069,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016655290.728, "dur": 17.621, + "args": { + "External id": 2944070,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016655318.985, "dur": 44.711, + "args": { + "External id": 2944071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016655371.103, "dur": 55.984, + "args": { + "External id": 2944072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016655440.131, "dur": 26.591, + "args": { + "External id": 2944073,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016655474.802, "dur": 32.352, + "args": { + "External id": 2944074,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016655513.103, "dur": 21.720, + "args": { + "External id": 2944075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016655543.103, "dur": 35.275, + "args": { + "External id": 2944076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016655597.044, "dur": 26.859, + "args": { + "External id": 2944077,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016655640.555, "dur": 24.974, + "args": { + "External id": 2944078,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016655683.362, "dur": 18.016, + "args": { + "External id": 2944079,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016655715.120, "dur": 20.363, + "args": { + "External id": 2944080,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016655749.428, "dur": 17.520, + "args": { + "External id": 2944081,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655842.873, "dur": 18.455, + "args": { + "External id": 2944082,"Record function id": 0, "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655846.472, "dur": 14.013, + "args": { + "External id": 2944083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655853.582, "dur": 6.136, + "args": { + "External id": 2944084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655855.406, "dur": 4.135, + "args": { + "External id": 2944085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655865.369, "dur": 5.374, + "args": { + "External id": 2944086,"Record function id": 0, "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655867.013, "dur": 3.272, + "args": { + "External id": 2944087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655867.712, "dur": 2.105, + "args": { + "External id": 2944088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655868.681, "dur": 1.017, + "args": { + "External id": 2944089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655874.030, "dur": 4.575, + "args": { + "External id": 2944090,"Record function id": 0, "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655875.300, "dur": 2.868, + "args": { + "External id": 2944091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655876.021, "dur": 1.728, + "args": { + "External id": 2944092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655876.476, "dur": 1.171, + "args": { + "External id": 2944093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655882.087, "dur": 6.115, + "args": { + "External id": 2944094,"Record function id": 0, "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655883.448, "dur": 4.324, + "args": { + "External id": 2944095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655884.012, "dur": 3.277, + "args": { + "External id": 2944096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655884.409, "dur": 2.785, + "args": { + "External id": 2944097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655891.443, "dur": 3.731, + "args": { + "External id": 2944098,"Record function id": 0, "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655892.573, "dur": 2.173, + "args": { + "External id": 2944099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655893.112, "dur": 1.136, + "args": { + "External id": 2944100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655893.440, "dur": 0.741, + "args": { + "External id": 2944101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655898.414, "dur": 4.239, + "args": { + "External id": 2944102,"Record function id": 0, "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655899.503, "dur": 2.653, + "args": { + "External id": 2944103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655900.045, "dur": 1.493, + "args": { + "External id": 2944104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655900.800, "dur": 0.586, + "args": { + "External id": 2944105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655909.121, "dur": 4.565, + "args": { + "External id": 2944106,"Record function id": 0, "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655910.444, "dur": 2.828, + "args": { + "External id": 2944107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655911.407, "dur": 1.299, + "args": { + "External id": 2944108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655912.067, "dur": 0.527, + "args": { + "External id": 2944109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655916.972, "dur": 3.744, + "args": { + "External id": 2944110,"Record function id": 0, "Ev Idx": 3181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655918.114, "dur": 2.186, + "args": { + "External id": 2944111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655918.801, "dur": 1.122, + "args": { + "External id": 2944112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655919.244, "dur": 0.608, + "args": { + "External id": 2944113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655923.934, "dur": 3.665, + "args": { + "External id": 2944114,"Record function id": 0, "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016655924.980, "dur": 2.161, + "args": { + "External id": 2944115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655925.475, "dur": 1.135, + "args": { + "External id": 2944116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016655925.905, "dur": 0.580, + "args": { + "External id": 2944117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016655931.408, "dur": 36517.958, + "args": { + "External id": 2944118,"Record function id": 0, "Sequence number": 29294595, "Fwd thread id": 1, "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016655932.804, "dur": 36507.884, + "args": { + "External id": 2944119,"Sequence number": 29294595, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3190 + } + }, + { + "ph": "f", "id": 193, "pid": 1336755, "tid": 1381158, "ts": 1555016655932.804, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 1336755, "tid": 1381158, + "ts": 1555016655960.619, "dur": 72.845, + "args": { + "External id": 2944120,"Record function id": 0, "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 1336755, "tid": 1381158, + "ts": 1555016656042.625, "dur": 68.450, + "args": { + "External id": 2944121,"Record function id": 0, "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 1336755, "tid": 1381158, + "ts": 1555016656116.487, "dur": 36316.560, + "args": { + "External id": 2944122,"Record function id": 0, "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016656217.987, "dur": 6.875, + "args": { + "External id": 2944123,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016656235.498, "dur": 4.882, + "args": { + "External id": 2944124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016656256.293, "dur": 35287.416, + "args": { + "External id": 2944125,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016656269.777, "dur": 35265.291, + "args": { + "External id": 2944126,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016656320.490, "dur": 14.424, + "args": { + "External id": 2944127,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016656341.055, "dur": 35153.805, + "args": { + "External id": 2944128,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016656343.613, "dur": 35150.689, + "args": { + "External id": 2944129,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016656347.715, "dur": 4.616, + "args": { + "External id": 2944130,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016656354.044, "dur": 35137.012, + "args": { + "External id": 2944131,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016691631.365, "dur": 9.220, + "args": { + "External id": 2944132,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016691634.736, "dur": 5.522, + "args": { + "External id": 2944133,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016691668.776, "dur": 441.916, + "args": { + "External id": 2944134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016691694.143, "dur": 411.170, + "args": { + "External id": 2944135,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3206, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016691705.380, "dur": 393.301, + "args": { + "External id": 2944136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016692133.059, "dur": 2.309, + "args": { + "External id": 2944137,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3208, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692210.988, "dur": 6.374, + "args": { + "External id": 2944138,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692261.365, "dur": 1.388, + "args": { + "External id": 2944139,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692281.329, "dur": 0.927, + "args": { + "External id": 2944140,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692298.372, "dur": 1.007, + "args": { + "External id": 2944141,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692312.327, "dur": 1.093, + "args": { + "External id": 2944142,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692327.363, "dur": 1.019, + "args": { + "External id": 2944143,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692342.105, "dur": 0.883, + "args": { + "External id": 2944144,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692358.220, "dur": 1.915, + "args": { + "External id": 2944145,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692372.555, "dur": 0.806, + "args": { + "External id": 2944146,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016692464.587, "dur": 2824.932, + "args": { + "External id": 2944147,"Record function id": 0, "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 1336755, "tid": 1381158, + "ts": 1555016692483.310, "dur": 1033.980, + "args": { + "External id": 2944148,"Record function id": 0, "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 1336755, "tid": 1381158, + "ts": 1555016692499.256, "dur": 316.387, + "args": { + "External id": 2944149,"Record function id": 0, "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692581.119, "dur": 4.040, + "args": { + "External id": 2944150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692588.338, "dur": 1.251, + "args": { + "External id": 2944151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692591.347, "dur": 1.117, + "args": { + "External id": 2944152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692594.049, "dur": 2.930, + "args": { + "External id": 2944153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692598.594, "dur": 0.900, + "args": { + "External id": 2944154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692601.287, "dur": 1.070, + "args": { + "External id": 2944155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692604.137, "dur": 1.718, + "args": { + "External id": 2944156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692607.446, "dur": 1.343, + "args": { + "External id": 2944157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692610.514, "dur": 0.823, + "args": { + "External id": 2944158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016692613.057, "dur": 0.930, + "args": { + "External id": 2944159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016692631.815, "dur": 154.232, + "args": { + "External id": 2944160,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016692647.764, "dur": 133.775, + "args": { + "External id": 2944161,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016692670.048, "dur": 12.039, + "args": { + "External id": 2944162,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016692685.485, "dur": 67.451, + "args": { + "External id": 2944163,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016692688.065, "dur": 64.548, + "args": { + "External id": 2944164,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016692692.439, "dur": 6.758, + "args": { + "External id": 2944165,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016692701.020, "dur": 50.795, + "args": { + "External id": 2944166,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 1336755, "tid": 1381158, + "ts": 1555016692906.674, "dur": 602.358, + "args": { + "External id": 2944167,"Record function id": 0, "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 1336755, "tid": 1381158, + "ts": 1555016692923.691, "dur": 572.838, + "args": { + "External id": 2944168,"Record function id": 0, "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016693022.461, "dur": 7.196, + "args": { + "External id": 2944169,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016693045.711, "dur": 31.846, + "args": { + "External id": 2944170,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693050.757, "dur": 1.553, + "args": { + "External id": 2944171,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693054.970, "dur": 0.660, + "args": { + "External id": 2944172,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693057.678, "dur": 0.410, + "args": { + "External id": 2944173,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693059.922, "dur": 0.356, + "args": { + "External id": 2944174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693061.966, "dur": 0.631, + "args": { + "External id": 2944175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693064.406, "dur": 0.395, + "args": { + "External id": 2944176,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693066.425, "dur": 2.320, + "args": { + "External id": 2944177,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693070.399, "dur": 0.417, + "args": { + "External id": 2944178,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693072.659, "dur": 0.363, + "args": { + "External id": 2944179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016693088.164, "dur": 36.368, + "args": { + "External id": 2944180,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016693171.068, "dur": 110.669, + "args": { + "External id": 2944181,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016693182.421, "dur": 4.648, + "args": { + "External id": 2944182,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016693193.734, "dur": 10.523, + "args": { + "External id": 2944183,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016693197.702, "dur": 6.146, + "args": { + "External id": 2944184,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693201.628, "dur": 0.761, + "args": { + "External id": 2944185,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016693211.514, "dur": 26.406, + "args": { + "External id": 2944186,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693214.311, "dur": 0.473, + "args": { + "External id": 2944187,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693216.883, "dur": 0.506, + "args": { + "External id": 2944188,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693219.644, "dur": 0.356, + "args": { + "External id": 2944189,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693221.983, "dur": 2.230, + "args": { + "External id": 2944190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693225.808, "dur": 0.504, + "args": { + "External id": 2944191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693228.075, "dur": 0.380, + "args": { + "External id": 2944192,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693230.161, "dur": 0.341, + "args": { + "External id": 2944193,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693232.093, "dur": 0.445, + "args": { + "External id": 2944194,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016693234.064, "dur": 0.404, + "args": { + "External id": 2944195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016693250.209, "dur": 23.997, + "args": { + "External id": 2944196,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016693327.496, "dur": 107.534, + "args": { + "External id": 2944197,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016693349.206, "dur": 82.155, + "args": { + "External id": 2944198,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3269, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016693359.442, "dur": 67.796, + "args": { + "External id": 2944199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016693448.451, "dur": 1.819, + "args": { + "External id": 2944200,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3271, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016693524.917, "dur": 1743.775, + "args": { + "External id": 2944201,"Sequence number": 29294594, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3272 + } + }, + { + "ph": "f", "id": 194, "pid": 1336755, "tid": 1381158, "ts": 1555016693524.917, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016693630.721, "dur": 101.893, + "args": { + "External id": 2944202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016693774.272, "dur": 39.980, + "args": { + "External id": 2944203,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016693834.842, "dur": 52.261, + "args": { + "External id": 2944204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016693896.711, "dur": 34.645, + "args": { + "External id": 2944205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016693941.051, "dur": 88.398, + "args": { + "External id": 2944206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694059.887, "dur": 35.888, + "args": { + "External id": 2944207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694104.307, "dur": 59.593, + "args": { + "External id": 2944208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016694192.907, "dur": 27.961, + "args": { + "External id": 2944209,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016694243.686, "dur": 30.785, + "args": { + "External id": 2944210,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016694295.783, "dur": 21.437, + "args": { + "External id": 2944211,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016694332.124, "dur": 17.887, + "args": { + "External id": 2944212,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694361.048, "dur": 35.609, + "args": { + "External id": 2944213,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694400.242, "dur": 35.381, + "args": { + "External id": 2944214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016694464.999, "dur": 177.310, + "args": { + "External id": 2944215,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016694544.905, "dur": 5.864, + "args": { + "External id": 2944216,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016694552.838, "dur": 2.178, + "args": { + "External id": 2944217,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016694675.423, "dur": 27.470, + "args": { + "External id": 2944218,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016694714.368, "dur": 15.763, + "args": { + "External id": 2944219,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694741.551, "dur": 34.781, + "args": { + "External id": 2944220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694783.377, "dur": 37.143, + "args": { + "External id": 2944221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694828.844, "dur": 23.747, + "args": { + "External id": 2944222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694858.676, "dur": 50.851, + "args": { + "External id": 2944223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694922.355, "dur": 23.000, + "args": { + "External id": 2944224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016694953.544, "dur": 72.616, + "args": { + "External id": 2944225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016695048.780, "dur": 25.141, + "args": { + "External id": 2944226,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016695094.783, "dur": 27.121, + "args": { + "External id": 2944227,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016695137.484, "dur": 33.098, + "args": { + "External id": 2944228,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016695192.665, "dur": 17.064, + "args": { + "External id": 2944229,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016695222.506, "dur": 18.211, + "args": { + "External id": 2944230,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695311.177, "dur": 15.298, + "args": { + "External id": 2944231,"Record function id": 0, "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695314.806, "dur": 10.722, + "args": { + "External id": 2944232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695319.145, "dur": 5.571, + "args": { + "External id": 2944233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695320.972, "dur": 3.617, + "args": { + "External id": 2944234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695330.770, "dur": 6.081, + "args": { + "External id": 2944235,"Record function id": 0, "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695332.419, "dur": 3.956, + "args": { + "External id": 2944236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695333.582, "dur": 2.316, + "args": { + "External id": 2944237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695334.699, "dur": 1.088, + "args": { + "External id": 2944238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695340.362, "dur": 7.163, + "args": { + "External id": 2944239,"Record function id": 0, "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695341.821, "dur": 5.297, + "args": { + "External id": 2944240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695342.832, "dur": 3.844, + "args": { + "External id": 2944241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695343.356, "dur": 3.196, + "args": { + "External id": 2944242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695351.032, "dur": 4.977, + "args": { + "External id": 2944243,"Record function id": 0, "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695352.732, "dur": 2.864, + "args": { + "External id": 2944244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695353.671, "dur": 1.372, + "args": { + "External id": 2944245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695354.134, "dur": 0.801, + "args": { + "External id": 2944246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695359.422, "dur": 4.041, + "args": { + "External id": 2944247,"Record function id": 0, "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695360.750, "dur": 2.303, + "args": { + "External id": 2944248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695361.260, "dur": 1.236, + "args": { + "External id": 2944249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695361.767, "dur": 0.613, + "args": { + "External id": 2944250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695366.693, "dur": 4.839, + "args": { + "External id": 2944251,"Record function id": 0, "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695368.071, "dur": 3.049, + "args": { + "External id": 2944252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695369.063, "dur": 1.664, + "args": { + "External id": 2944253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695369.733, "dur": 0.847, + "args": { + "External id": 2944254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695374.787, "dur": 3.820, + "args": { + "External id": 2944255,"Record function id": 0, "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695375.990, "dur": 2.209, + "args": { + "External id": 2944256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695376.710, "dur": 1.096, + "args": { + "External id": 2944257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695376.995, "dur": 0.727, + "args": { + "External id": 2944258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695381.688, "dur": 4.471, + "args": { + "External id": 2944259,"Record function id": 0, "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695382.992, "dur": 2.763, + "args": { + "External id": 2944260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695383.789, "dur": 1.423, + "args": { + "External id": 2944261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695384.307, "dur": 0.788, + "args": { + "External id": 2944262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695389.475, "dur": 4.879, + "args": { + "External id": 2944263,"Record function id": 0, "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016695390.798, "dur": 3.142, + "args": { + "External id": 2944264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695391.694, "dur": 1.773, + "args": { + "External id": 2944265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016695392.438, "dur": 0.936, + "args": { + "External id": 2944266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016695398.414, "dur": 37027.728, + "args": { + "External id": 2944267,"Record function id": 0, "Sequence number": 29294593, "Fwd thread id": 1, "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016695399.928, "dur": 37017.207, + "args": { + "External id": 2944268,"Sequence number": 29294593, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3339 + } + }, + { + "ph": "f", "id": 195, "pid": 1336755, "tid": 1381158, "ts": 1555016695399.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 1336755, "tid": 1381158, + "ts": 1555016695429.585, "dur": 39.757, + "args": { + "External id": 2944269,"Record function id": 0, "Ev Idx": 3340 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 1336755, "tid": 1381158, + "ts": 1555016695477.013, "dur": 69.139, + "args": { + "External id": 2944270,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 1336755, "tid": 1381158, + "ts": 1555016695552.159, "dur": 36856.912, + "args": { + "External id": 2944271,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016695638.002, "dur": 6.190, + "args": { + "External id": 2944272,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016695653.783, "dur": 6.453, + "args": { + "External id": 2944273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016695674.628, "dur": 35836.170, + "args": { + "External id": 2944274,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016695687.249, "dur": 35814.721, + "args": { + "External id": 2944275,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016695746.200, "dur": 13.967, + "args": { + "External id": 2944276,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016695766.629, "dur": 35696.208, + "args": { + "External id": 2944277,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016695769.288, "dur": 35692.951, + "args": { + "External id": 2944278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016695773.520, "dur": 4.386, + "args": { + "External id": 2944279,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016695779.791, "dur": 35679.129, + "args": { + "External id": 2944280,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016731595.115, "dur": 8.736, + "args": { + "External id": 2944281,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016731598.211, "dur": 5.322, + "args": { + "External id": 2944282,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016731632.237, "dur": 450.730, + "args": { + "External id": 2944283,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016731658.375, "dur": 419.018, + "args": { + "External id": 2944284,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3355, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016731669.573, "dur": 401.731, + "args": { + "External id": 2944285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016732113.620, "dur": 2.194, + "args": { + "External id": 2944286,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3357, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732193.256, "dur": 6.680, + "args": { + "External id": 2944287,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732244.226, "dur": 1.428, + "args": { + "External id": 2944288,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732263.090, "dur": 2.875, + "args": { + "External id": 2944289,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732279.378, "dur": 0.894, + "args": { + "External id": 2944290,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732292.596, "dur": 0.922, + "args": { + "External id": 2944291,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732305.911, "dur": 0.954, + "args": { + "External id": 2944292,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732319.558, "dur": 2.629, + "args": { + "External id": 2944293,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732334.588, "dur": 1.886, + "args": { + "External id": 2944294,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732348.307, "dur": 0.882, + "args": { + "External id": 2944295,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016732441.797, "dur": 2833.770, + "args": { + "External id": 2944296,"Record function id": 0, "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 1336755, "tid": 1381158, + "ts": 1555016732460.041, "dur": 1046.084, + "args": { + "External id": 2944297,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 1336755, "tid": 1381158, + "ts": 1555016732473.931, "dur": 320.686, + "args": { + "External id": 2944298,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732561.001, "dur": 3.956, + "args": { + "External id": 2944299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732568.288, "dur": 1.546, + "args": { + "External id": 2944300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732571.787, "dur": 3.396, + "args": { + "External id": 2944301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732576.885, "dur": 0.952, + "args": { + "External id": 2944302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732579.329, "dur": 0.864, + "args": { + "External id": 2944303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732581.782, "dur": 1.122, + "args": { + "External id": 2944304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732584.560, "dur": 1.803, + "args": { + "External id": 2944305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732587.698, "dur": 0.815, + "args": { + "External id": 2944306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732590.012, "dur": 0.825, + "args": { + "External id": 2944307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016732592.391, "dur": 1.141, + "args": { + "External id": 2944308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016732611.110, "dur": 154.019, + "args": { + "External id": 2944309,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016732625.753, "dur": 134.399, + "args": { + "External id": 2944310,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016732646.420, "dur": 14.267, + "args": { + "External id": 2944311,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016732663.997, "dur": 66.110, + "args": { + "External id": 2944312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016732666.616, "dur": 63.174, + "args": { + "External id": 2944313,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016732670.562, "dur": 5.785, + "args": { + "External id": 2944314,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016732678.938, "dur": 50.052, + "args": { + "External id": 2944315,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 1336755, "tid": 1381158, + "ts": 1555016732887.502, "dur": 610.199, + "args": { + "External id": 2944316,"Record function id": 0, "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 1336755, "tid": 1381158, + "ts": 1555016732902.767, "dur": 582.945, + "args": { + "External id": 2944317,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016732964.092, "dur": 4.972, + "args": { + "External id": 2944318,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016733022.561, "dur": 31.479, + "args": { + "External id": 2944319,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733027.401, "dur": 1.629, + "args": { + "External id": 2944320,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733031.719, "dur": 0.574, + "args": { + "External id": 2944321,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733033.991, "dur": 0.700, + "args": { + "External id": 2944322,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733036.221, "dur": 0.654, + "args": { + "External id": 2944323,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733038.605, "dur": 0.409, + "args": { + "External id": 2944324,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733040.795, "dur": 2.324, + "args": { + "External id": 2944325,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733044.926, "dur": 0.634, + "args": { + "External id": 2944326,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733047.267, "dur": 0.453, + "args": { + "External id": 2944327,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733049.234, "dur": 0.532, + "args": { + "External id": 2944328,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016733065.074, "dur": 35.509, + "args": { + "External id": 2944329,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016733134.405, "dur": 126.188, + "args": { + "External id": 2944330,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016733160.485, "dur": 5.153, + "args": { + "External id": 2944331,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016733171.771, "dur": 10.987, + "args": { + "External id": 2944332,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016733175.805, "dur": 6.553, + "args": { + "External id": 2944333,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733180.074, "dur": 0.788, + "args": { + "External id": 2944334,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016733191.194, "dur": 26.866, + "args": { + "External id": 2944335,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733194.036, "dur": 0.465, + "args": { + "External id": 2944336,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733196.326, "dur": 0.448, + "args": { + "External id": 2944337,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733198.800, "dur": 2.496, + "args": { + "External id": 2944338,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733202.893, "dur": 0.384, + "args": { + "External id": 2944339,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733205.268, "dur": 0.488, + "args": { + "External id": 2944340,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733207.648, "dur": 0.404, + "args": { + "External id": 2944341,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733210.153, "dur": 0.410, + "args": { + "External id": 2944342,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733212.305, "dur": 0.374, + "args": { + "External id": 2944343,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016733214.240, "dur": 0.333, + "args": { + "External id": 2944344,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016733229.485, "dur": 23.482, + "args": { + "External id": 2944345,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016733308.786, "dur": 115.459, + "args": { + "External id": 2944346,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016733334.551, "dur": 85.907, + "args": { + "External id": 2944347,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3418, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016733344.493, "dur": 71.921, + "args": { + "External id": 2944348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016733436.658, "dur": 1.703, + "args": { + "External id": 2944349,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3420, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016733513.256, "dur": 1735.817, + "args": { + "External id": 2944350,"Sequence number": 29294592, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3421 + } + }, + { + "ph": "f", "id": 196, "pid": 1336755, "tid": 1381158, "ts": 1555016733513.256, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016733619.595, "dur": 100.780, + "args": { + "External id": 2944351,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016733756.249, "dur": 40.989, + "args": { + "External id": 2944352,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016733815.573, "dur": 51.299, + "args": { + "External id": 2944353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016733878.268, "dur": 34.660, + "args": { + "External id": 2944354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016733920.179, "dur": 47.152, + "args": { + "External id": 2944355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016733975.250, "dur": 80.691, + "args": { + "External id": 2944356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734067.416, "dur": 46.147, + "args": { + "External id": 2944357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016734140.191, "dur": 42.623, + "args": { + "External id": 2944358,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016734204.979, "dur": 33.648, + "args": { + "External id": 2944359,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016734260.534, "dur": 19.708, + "args": { + "External id": 2944360,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016734293.292, "dur": 16.184, + "args": { + "External id": 2944361,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734318.767, "dur": 34.444, + "args": { + "External id": 2944362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734356.941, "dur": 34.631, + "args": { + "External id": 2944363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016734439.784, "dur": 170.017, + "args": { + "External id": 2944364,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016734515.188, "dur": 5.417, + "args": { + "External id": 2944365,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016734522.697, "dur": 2.461, + "args": { + "External id": 2944366,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016734643.648, "dur": 24.489, + "args": { + "External id": 2944367,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016734679.236, "dur": 15.679, + "args": { + "External id": 2944368,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734703.797, "dur": 37.667, + "args": { + "External id": 2944369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734748.967, "dur": 36.756, + "args": { + "External id": 2944370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734795.513, "dur": 23.098, + "args": { + "External id": 2944371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734824.499, "dur": 49.419, + "args": { + "External id": 2944372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734886.390, "dur": 27.480, + "args": { + "External id": 2944373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016734925.673, "dur": 33.035, + "args": { + "External id": 2944374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016735018.597, "dur": 31.609, + "args": { + "External id": 2944375,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016735069.294, "dur": 28.143, + "args": { + "External id": 2944376,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016735114.162, "dur": 18.436, + "args": { + "External id": 2944377,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016735166.452, "dur": 19.200, + "args": { + "External id": 2944378,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016735199.952, "dur": 21.065, + "args": { + "External id": 2944379,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735296.948, "dur": 15.403, + "args": { + "External id": 2944380,"Record function id": 0, "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735300.786, "dur": 10.555, + "args": { + "External id": 2944381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735304.888, "dur": 5.509, + "args": { + "External id": 2944382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735306.589, "dur": 3.663, + "args": { + "External id": 2944383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735316.360, "dur": 7.826, + "args": { + "External id": 2944384,"Record function id": 0, "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735317.924, "dur": 5.833, + "args": { + "External id": 2944385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735318.789, "dur": 4.471, + "args": { + "External id": 2944386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735319.980, "dur": 3.111, + "args": { + "External id": 2944387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735327.475, "dur": 4.597, + "args": { + "External id": 2944388,"Record function id": 0, "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735328.959, "dur": 2.703, + "args": { + "External id": 2944389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735329.479, "dur": 1.754, + "args": { + "External id": 2944390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735330.098, "dur": 1.001, + "args": { + "External id": 2944391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735335.218, "dur": 3.939, + "args": { + "External id": 2944392,"Record function id": 0, "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735336.606, "dur": 2.130, + "args": { + "External id": 2944393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735337.189, "dur": 1.148, + "args": { + "External id": 2944394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735337.484, "dur": 0.761, + "args": { + "External id": 2944395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735342.215, "dur": 4.096, + "args": { + "External id": 2944396,"Record function id": 0, "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735343.390, "dur": 2.503, + "args": { + "External id": 2944397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735344.192, "dur": 1.278, + "args": { + "External id": 2944398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735344.761, "dur": 0.574, + "args": { + "External id": 2944399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735349.402, "dur": 4.484, + "args": { + "External id": 2944400,"Record function id": 0, "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735350.853, "dur": 2.589, + "args": { + "External id": 2944401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735351.368, "dur": 1.640, + "args": { + "External id": 2944402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735352.176, "dur": 0.693, + "args": { + "External id": 2944403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735357.042, "dur": 3.838, + "args": { + "External id": 2944404,"Record function id": 0, "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735358.289, "dur": 2.167, + "args": { + "External id": 2944405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735358.830, "dur": 1.219, + "args": { + "External id": 2944406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735359.282, "dur": 0.637, + "args": { + "External id": 2944407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735363.942, "dur": 4.093, + "args": { + "External id": 2944408,"Record function id": 0, "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735365.319, "dur": 2.296, + "args": { + "External id": 2944409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735366.115, "dur": 1.098, + "args": { + "External id": 2944410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735366.384, "dur": 0.692, + "args": { + "External id": 2944411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735371.063, "dur": 5.470, + "args": { + "External id": 2944412,"Record function id": 0, "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016735372.144, "dur": 3.952, + "args": { + "External id": 2944413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735372.840, "dur": 2.858, + "args": { + "External id": 2944414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016735373.233, "dur": 2.342, + "args": { + "External id": 2944415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016735380.227, "dur": 37062.541, + "args": { + "External id": 2944416,"Record function id": 0, "Sequence number": 29294591, "Fwd thread id": 1, "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016735381.929, "dur": 37052.427, + "args": { + "External id": 2944417,"Sequence number": 29294591, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3488 + } + }, + { + "ph": "f", "id": 197, "pid": 1336755, "tid": 1381158, "ts": 1555016735381.929, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 1336755, "tid": 1381158, + "ts": 1555016735408.097, "dur": 36.155, + "args": { + "External id": 2944418,"Record function id": 0, "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 1336755, "tid": 1381158, + "ts": 1555016735451.783, "dur": 63.830, + "args": { + "External id": 2944419,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 1336755, "tid": 1381158, + "ts": 1555016735520.889, "dur": 36906.269, + "args": { + "External id": 2944420,"Record function id": 0, "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016735604.948, "dur": 6.392, + "args": { + "External id": 2944421,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016735620.130, "dur": 4.522, + "args": { + "External id": 2944422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016735638.414, "dur": 35829.011, + "args": { + "External id": 2944423,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016735651.701, "dur": 35806.955, + "args": { + "External id": 2944424,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016735705.205, "dur": 12.798, + "args": { + "External id": 2944425,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016735724.222, "dur": 35692.805, + "args": { + "External id": 2944426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016735726.997, "dur": 35689.225, + "args": { + "External id": 2944427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016735730.987, "dur": 5.436, + "args": { + "External id": 2944428,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016735738.260, "dur": 35674.261, + "args": { + "External id": 2944429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016771557.691, "dur": 9.585, + "args": { + "External id": 2944430,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016771560.938, "dur": 6.021, + "args": { + "External id": 2944431,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016771596.188, "dur": 518.188, + "args": { + "External id": 2944432,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016771621.527, "dur": 486.999, + "args": { + "External id": 2944433,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3504, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016771632.107, "dur": 470.284, + "args": { + "External id": 2944434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016772136.635, "dur": 2.567, + "args": { + "External id": 2944435,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3506, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772218.531, "dur": 6.611, + "args": { + "External id": 2944436,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772267.740, "dur": 3.522, + "args": { + "External id": 2944437,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772288.187, "dur": 0.886, + "args": { + "External id": 2944438,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772301.239, "dur": 0.852, + "args": { + "External id": 2944439,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772313.744, "dur": 0.977, + "args": { + "External id": 2944440,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772326.429, "dur": 3.027, + "args": { + "External id": 2944441,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772341.844, "dur": 0.963, + "args": { + "External id": 2944442,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772355.263, "dur": 1.876, + "args": { + "External id": 2944443,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772368.508, "dur": 0.760, + "args": { + "External id": 2944444,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016772458.229, "dur": 2846.948, + "args": { + "External id": 2944445,"Record function id": 0, "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 1336755, "tid": 1381158, + "ts": 1555016772476.501, "dur": 1079.229, + "args": { + "External id": 2944446,"Record function id": 0, "Ev Idx": 3517 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 1336755, "tid": 1381158, + "ts": 1555016772491.197, "dur": 343.444, + "args": { + "External id": 2944447,"Record function id": 0, "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772573.949, "dur": 5.628, + "args": { + "External id": 2944448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772582.668, "dur": 1.344, + "args": { + "External id": 2944449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772585.848, "dur": 1.207, + "args": { + "External id": 2944450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772589.041, "dur": 1.001, + "args": { + "External id": 2944451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772591.432, "dur": 1.062, + "args": { + "External id": 2944452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772594.372, "dur": 0.896, + "args": { + "External id": 2944453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772596.928, "dur": 1.595, + "args": { + "External id": 2944454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772599.769, "dur": 1.227, + "args": { + "External id": 2944455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772602.373, "dur": 4.448, + "args": { + "External id": 2944456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016772608.666, "dur": 0.914, + "args": { + "External id": 2944457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016772627.827, "dur": 172.457, + "args": { + "External id": 2944458,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016772643.805, "dur": 151.754, + "args": { + "External id": 2944459,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016772668.446, "dur": 11.893, + "args": { + "External id": 2944460,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016772683.620, "dur": 81.457, + "args": { + "External id": 2944461,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016772686.309, "dur": 78.366, + "args": { + "External id": 2944462,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016772690.264, "dur": 5.671, + "args": { + "External id": 2944463,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016772697.835, "dur": 66.038, + "args": { + "External id": 2944464,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 1336755, "tid": 1381158, + "ts": 1555016772928.470, "dur": 618.812, + "args": { + "External id": 2944465,"Record function id": 0, "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 1336755, "tid": 1381158, + "ts": 1555016772945.511, "dur": 589.233, + "args": { + "External id": 2944466,"Record function id": 0, "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016773047.830, "dur": 6.436, + "args": { + "External id": 2944467,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016773069.443, "dur": 30.914, + "args": { + "External id": 2944468,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773074.361, "dur": 1.416, + "args": { + "External id": 2944469,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773077.926, "dur": 0.686, + "args": { + "External id": 2944470,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773080.535, "dur": 0.398, + "args": { + "External id": 2944471,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773083.021, "dur": 2.547, + "args": { + "External id": 2944472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773087.377, "dur": 0.434, + "args": { + "External id": 2944473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773089.461, "dur": 0.344, + "args": { + "External id": 2944474,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773091.560, "dur": 0.428, + "args": { + "External id": 2944475,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773093.608, "dur": 0.390, + "args": { + "External id": 2944476,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773095.639, "dur": 0.405, + "args": { + "External id": 2944477,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016773110.549, "dur": 50.746, + "args": { + "External id": 2944478,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016773196.239, "dur": 120.141, + "args": { + "External id": 2944479,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016773206.736, "dur": 4.187, + "args": { + "External id": 2944480,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016773222.869, "dur": 10.735, + "args": { + "External id": 2944481,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016773227.260, "dur": 5.878, + "args": { + "External id": 2944482,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773230.983, "dur": 0.693, + "args": { + "External id": 2944483,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016773244.161, "dur": 27.055, + "args": { + "External id": 2944484,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773246.594, "dur": 2.552, + "args": { + "External id": 2944485,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773250.920, "dur": 0.602, + "args": { + "External id": 2944486,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773253.182, "dur": 0.431, + "args": { + "External id": 2944487,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773254.988, "dur": 0.382, + "args": { + "External id": 2944488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773256.965, "dur": 0.337, + "args": { + "External id": 2944489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773258.882, "dur": 0.375, + "args": { + "External id": 2944490,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773260.954, "dur": 0.681, + "args": { + "External id": 2944491,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773263.106, "dur": 0.469, + "args": { + "External id": 2944492,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016773265.082, "dur": 2.702, + "args": { + "External id": 2944493,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016773285.215, "dur": 23.479, + "args": { + "External id": 2944494,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016773364.219, "dur": 107.855, + "args": { + "External id": 2944495,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016773385.539, "dur": 82.819, + "args": { + "External id": 2944496,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3567, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016773394.603, "dur": 68.944, + "args": { + "External id": 2944497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016773485.922, "dur": 2.025, + "args": { + "External id": 2944498,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3569, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016773562.773, "dur": 1721.984, + "args": { + "External id": 2944499,"Sequence number": 29294590, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3570 + } + }, + { + "ph": "f", "id": 198, "pid": 1336755, "tid": 1381158, "ts": 1555016773562.773, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016773670.806, "dur": 105.974, + "args": { + "External id": 2944500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016773815.220, "dur": 41.780, + "args": { + "External id": 2944501,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016773876.033, "dur": 49.796, + "args": { + "External id": 2944502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016773935.715, "dur": 32.625, + "args": { + "External id": 2944503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016773975.356, "dur": 92.840, + "args": { + "External id": 2944504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774080.150, "dur": 31.323, + "args": { + "External id": 2944505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774121.234, "dur": 56.073, + "args": { + "External id": 2944506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016774203.208, "dur": 25.930, + "args": { + "External id": 2944507,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016774247.048, "dur": 29.829, + "args": { + "External id": 2944508,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016774296.361, "dur": 19.270, + "args": { + "External id": 2944509,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016774327.983, "dur": 16.962, + "args": { + "External id": 2944510,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774355.125, "dur": 32.022, + "args": { + "External id": 2944511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774390.661, "dur": 33.677, + "args": { + "External id": 2944512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016774480.295, "dur": 171.902, + "args": { + "External id": 2944513,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016774555.425, "dur": 5.767, + "args": { + "External id": 2944514,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016774563.387, "dur": 2.160, + "args": { + "External id": 2944515,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016774683.638, "dur": 23.819, + "args": { + "External id": 2944516,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016774718.965, "dur": 15.693, + "args": { + "External id": 2944517,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774746.987, "dur": 37.467, + "args": { + "External id": 2944518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774793.471, "dur": 36.864, + "args": { + "External id": 2944519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774836.870, "dur": 21.629, + "args": { + "External id": 2944520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774864.105, "dur": 48.722, + "args": { + "External id": 2944521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774925.057, "dur": 25.305, + "args": { + "External id": 2944522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016774960.706, "dur": 70.151, + "args": { + "External id": 2944523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016775052.630, "dur": 25.191, + "args": { + "External id": 2944524,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016775104.522, "dur": 29.261, + "args": { + "External id": 2944525,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016775164.405, "dur": 21.012, + "args": { + "External id": 2944526,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016775205.909, "dur": 15.945, + "args": { + "External id": 2944527,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016775234.002, "dur": 18.912, + "args": { + "External id": 2944528,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775327.224, "dur": 19.480, + "args": { + "External id": 2944529,"Record function id": 0, "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775330.880, "dur": 15.002, + "args": { + "External id": 2944530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775337.508, "dur": 7.463, + "args": { + "External id": 2944531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775339.311, "dur": 5.524, + "args": { + "External id": 2944532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775350.616, "dur": 5.723, + "args": { + "External id": 2944533,"Record function id": 0, "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775352.385, "dur": 3.531, + "args": { + "External id": 2944534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775353.293, "dur": 2.005, + "args": { + "External id": 2944535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775353.976, "dur": 1.230, + "args": { + "External id": 2944536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775359.644, "dur": 4.557, + "args": { + "External id": 2944537,"Record function id": 0, "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775360.999, "dur": 2.789, + "args": { + "External id": 2944538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775361.935, "dur": 1.327, + "args": { + "External id": 2944539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775362.252, "dur": 0.929, + "args": { + "External id": 2944540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775367.460, "dur": 4.288, + "args": { + "External id": 2944541,"Record function id": 0, "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775368.911, "dur": 2.431, + "args": { + "External id": 2944542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775369.647, "dur": 1.168, + "args": { + "External id": 2944543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775370.121, "dur": 0.586, + "args": { + "External id": 2944544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775374.860, "dur": 4.070, + "args": { + "External id": 2944545,"Record function id": 0, "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775376.100, "dur": 2.407, + "args": { + "External id": 2944546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775376.715, "dur": 1.292, + "args": { + "External id": 2944547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775377.224, "dur": 0.659, + "args": { + "External id": 2944548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775382.154, "dur": 4.141, + "args": { + "External id": 2944549,"Record function id": 0, "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775383.259, "dur": 2.615, + "args": { + "External id": 2944550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775383.902, "dur": 1.431, + "args": { + "External id": 2944551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775384.555, "dur": 0.609, + "args": { + "External id": 2944552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775389.595, "dur": 6.484, + "args": { + "External id": 2944553,"Record function id": 0, "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775390.469, "dur": 5.186, + "args": { + "External id": 2944554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775390.968, "dur": 4.128, + "args": { + "External id": 2944555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775391.237, "dur": 3.754, + "args": { + "External id": 2944556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775399.195, "dur": 6.343, + "args": { + "External id": 2944557,"Record function id": 0, "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775400.833, "dur": 4.249, + "args": { + "External id": 2944558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775401.298, "dur": 3.377, + "args": { + "External id": 2944559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775401.808, "dur": 2.780, + "args": { + "External id": 2944560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775408.664, "dur": 4.357, + "args": { + "External id": 2944561,"Record function id": 0, "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016775409.752, "dur": 2.821, + "args": { + "External id": 2944562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775410.597, "dur": 1.424, + "args": { + "External id": 2944563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016775411.240, "dur": 0.688, + "args": { + "External id": 2944564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016775416.884, "dur": 35943.553, + "args": { + "External id": 2944565,"Record function id": 0, "Sequence number": 29294589, "Fwd thread id": 1, "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016775418.254, "dur": 35934.086, + "args": { + "External id": 2944566,"Sequence number": 29294589, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3637 + } + }, + { + "ph": "f", "id": 199, "pid": 1336755, "tid": 1381158, "ts": 1555016775418.254, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 1336755, "tid": 1381158, + "ts": 1555016775448.560, "dur": 39.786, + "args": { + "External id": 2944567,"Record function id": 0, "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 1336755, "tid": 1381158, + "ts": 1555016775495.790, "dur": 64.325, + "args": { + "External id": 2944568,"Record function id": 0, "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 1336755, "tid": 1381158, + "ts": 1555016775565.542, "dur": 35779.146, + "args": { + "External id": 2944569,"Record function id": 0, "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016775650.437, "dur": 5.903, + "args": { + "External id": 2944570,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016775665.605, "dur": 4.521, + "args": { + "External id": 2944571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016775683.985, "dur": 34812.924, + "args": { + "External id": 2944572,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016775696.351, "dur": 34792.044, + "args": { + "External id": 2944573,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016775740.459, "dur": 12.750, + "args": { + "External id": 2944574,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016775759.394, "dur": 34689.969, + "args": { + "External id": 2944575,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016775761.886, "dur": 34686.691, + "args": { + "External id": 2944576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016775765.967, "dur": 5.593, + "args": { + "External id": 2944577,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016775773.650, "dur": 34671.435, + "args": { + "External id": 2944578,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016810584.180, "dur": 9.138, + "args": { + "External id": 2944579,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016810587.410, "dur": 5.576, + "args": { + "External id": 2944580,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016810623.224, "dur": 406.785, + "args": { + "External id": 2944581,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016810649.030, "dur": 375.900, + "args": { + "External id": 2944582,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3653, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016810659.993, "dur": 358.690, + "args": { + "External id": 2944583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016811052.540, "dur": 2.481, + "args": { + "External id": 2944584,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3655, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811116.521, "dur": 6.423, + "args": { + "External id": 2944585,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811185.826, "dur": 2.082, + "args": { + "External id": 2944586,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811204.930, "dur": 0.931, + "args": { + "External id": 2944587,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811219.425, "dur": 1.208, + "args": { + "External id": 2944588,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811231.705, "dur": 0.956, + "args": { + "External id": 2944589,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811245.965, "dur": 0.858, + "args": { + "External id": 2944590,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811258.069, "dur": 1.140, + "args": { + "External id": 2944591,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811270.070, "dur": 2.165, + "args": { + "External id": 2944592,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811283.065, "dur": 0.804, + "args": { + "External id": 2944593,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016811376.668, "dur": 2753.549, + "args": { + "External id": 2944594,"Record function id": 0, "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 1336755, "tid": 1381158, + "ts": 1555016811396.117, "dur": 1023.349, + "args": { + "External id": 2944595,"Record function id": 0, "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 1336755, "tid": 1381158, + "ts": 1555016811411.323, "dur": 314.873, + "args": { + "External id": 2944596,"Record function id": 0, "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811493.305, "dur": 3.829, + "args": { + "External id": 2944597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811500.429, "dur": 1.480, + "args": { + "External id": 2944598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811503.805, "dur": 1.343, + "args": { + "External id": 2944599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811506.885, "dur": 1.356, + "args": { + "External id": 2944600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811509.812, "dur": 1.145, + "args": { + "External id": 2944601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811512.748, "dur": 0.937, + "args": { + "External id": 2944602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811515.156, "dur": 1.720, + "args": { + "External id": 2944603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811518.186, "dur": 2.999, + "args": { + "External id": 2944604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811522.733, "dur": 1.001, + "args": { + "External id": 2944605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016811525.233, "dur": 0.869, + "args": { + "External id": 2944606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016811543.502, "dur": 152.781, + "args": { + "External id": 2944607,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016811559.338, "dur": 132.882, + "args": { + "External id": 2944608,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016811580.230, "dur": 12.663, + "args": { + "External id": 2944609,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016811595.890, "dur": 66.177, + "args": { + "External id": 2944610,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016811598.478, "dur": 63.238, + "args": { + "External id": 2944611,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811602.434, "dur": 5.109, + "args": { + "External id": 2944612,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016811609.182, "dur": 51.786, + "args": { + "External id": 2944613,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 1336755, "tid": 1381158, + "ts": 1555016811816.461, "dur": 595.617, + "args": { + "External id": 2944614,"Record function id": 0, "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 1336755, "tid": 1381158, + "ts": 1555016811832.429, "dur": 567.386, + "args": { + "External id": 2944615,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016811894.711, "dur": 4.806, + "args": { + "External id": 2944616,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016811914.006, "dur": 28.816, + "args": { + "External id": 2944617,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811918.623, "dur": 1.326, + "args": { + "External id": 2944618,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811921.893, "dur": 0.462, + "args": { + "External id": 2944619,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811924.301, "dur": 2.443, + "args": { + "External id": 2944620,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811928.738, "dur": 0.501, + "args": { + "External id": 2944621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811930.781, "dur": 0.639, + "args": { + "External id": 2944622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811932.856, "dur": 0.557, + "args": { + "External id": 2944623,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811934.976, "dur": 0.329, + "args": { + "External id": 2944624,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811937.292, "dur": 0.375, + "args": { + "External id": 2944625,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016811939.075, "dur": 0.389, + "args": { + "External id": 2944626,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016811952.835, "dur": 69.529, + "args": { + "External id": 2944627,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016812056.768, "dur": 123.874, + "args": { + "External id": 2944628,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016812067.588, "dur": 4.944, + "args": { + "External id": 2944629,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016812078.212, "dur": 12.894, + "args": { + "External id": 2944630,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016812082.438, "dur": 8.265, + "args": { + "External id": 2944631,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812086.344, "dur": 2.755, + "args": { + "External id": 2944632,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016812098.329, "dur": 24.494, + "args": { + "External id": 2944633,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812100.705, "dur": 0.433, + "args": { + "External id": 2944634,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812102.806, "dur": 0.352, + "args": { + "External id": 2944635,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812105.320, "dur": 0.410, + "args": { + "External id": 2944636,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812107.614, "dur": 0.324, + "args": { + "External id": 2944637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812109.601, "dur": 0.574, + "args": { + "External id": 2944638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812111.524, "dur": 0.418, + "args": { + "External id": 2944639,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812113.452, "dur": 0.706, + "args": { + "External id": 2944640,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812115.533, "dur": 2.197, + "args": { + "External id": 2944641,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016812119.281, "dur": 0.322, + "args": { + "External id": 2944642,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016812132.891, "dur": 38.876, + "args": { + "External id": 2944643,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016812229.875, "dur": 107.980, + "args": { + "External id": 2944644,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016812252.515, "dur": 81.904, + "args": { + "External id": 2944645,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3716, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016812262.031, "dur": 68.246, + "args": { + "External id": 2944646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016812350.906, "dur": 1.887, + "args": { + "External id": 2944647,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3718, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016812425.981, "dur": 1685.263, + "args": { + "External id": 2944648,"Sequence number": 29294588, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3719 + } + }, + { + "ph": "f", "id": 200, "pid": 1336755, "tid": 1381158, "ts": 1555016812425.981, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016812533.130, "dur": 102.137, + "args": { + "External id": 2944649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016812671.676, "dur": 39.787, + "args": { + "External id": 2944650,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016812729.247, "dur": 48.704, + "args": { + "External id": 2944651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016812787.449, "dur": 32.492, + "args": { + "External id": 2944652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016812826.661, "dur": 45.872, + "args": { + "External id": 2944653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016812880.233, "dur": 27.318, + "args": { + "External id": 2944654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016812916.111, "dur": 42.541, + "args": { + "External id": 2944655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016812979.994, "dur": 67.432, + "args": { + "External id": 2944656,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016813071.417, "dur": 33.496, + "args": { + "External id": 2944657,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016813129.126, "dur": 35.350, + "args": { + "External id": 2944658,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016813181.443, "dur": 18.600, + "args": { + "External id": 2944659,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813211.368, "dur": 34.897, + "args": { + "External id": 2944660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813250.071, "dur": 34.779, + "args": { + "External id": 2944661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016813310.946, "dur": 176.373, + "args": { + "External id": 2944662,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016813388.741, "dur": 6.162, + "args": { + "External id": 2944663,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016813397.108, "dur": 2.438, + "args": { + "External id": 2944664,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016813518.515, "dur": 26.317, + "args": { + "External id": 2944665,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016813567.999, "dur": 17.216, + "args": { + "External id": 2944666,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813595.689, "dur": 36.666, + "args": { + "External id": 2944667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813639.624, "dur": 37.590, + "args": { + "External id": 2944668,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813684.273, "dur": 22.731, + "args": { + "External id": 2944669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813712.390, "dur": 31.753, + "args": { + "External id": 2944670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813752.057, "dur": 24.397, + "args": { + "External id": 2944671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016813797.517, "dur": 46.644, + "args": { + "External id": 2944672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016813863.486, "dur": 23.389, + "args": { + "External id": 2944673,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016813903.384, "dur": 26.519, + "args": { + "External id": 2944674,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016813946.573, "dur": 17.742, + "args": { + "External id": 2944675,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016814019.062, "dur": 23.379, + "args": { + "External id": 2944676,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016814060.877, "dur": 18.906, + "args": { + "External id": 2944677,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814166.489, "dur": 16.540, + "args": { + "External id": 2944678,"Record function id": 0, "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814170.605, "dur": 11.211, + "args": { + "External id": 2944679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814174.290, "dur": 6.151, + "args": { + "External id": 2944680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814176.238, "dur": 3.934, + "args": { + "External id": 2944681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814187.632, "dur": 5.277, + "args": { + "External id": 2944682,"Record function id": 0, "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814189.149, "dur": 3.280, + "args": { + "External id": 2944683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814189.902, "dur": 1.905, + "args": { + "External id": 2944684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814190.502, "dur": 1.187, + "args": { + "External id": 2944685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814196.158, "dur": 3.894, + "args": { + "External id": 2944686,"Record function id": 0, "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814197.138, "dur": 2.510, + "args": { + "External id": 2944687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814197.724, "dur": 1.362, + "args": { + "External id": 2944688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814198.134, "dur": 0.874, + "args": { + "External id": 2944689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814203.241, "dur": 3.676, + "args": { + "External id": 2944690,"Record function id": 0, "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814204.311, "dur": 2.173, + "args": { + "External id": 2944691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814205.015, "dur": 1.063, + "args": { + "External id": 2944692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814205.368, "dur": 0.646, + "args": { + "External id": 2944693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814209.969, "dur": 4.318, + "args": { + "External id": 2944694,"Record function id": 0, "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814211.253, "dur": 2.609, + "args": { + "External id": 2944695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814211.891, "dur": 1.556, + "args": { + "External id": 2944696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814212.354, "dur": 1.025, + "args": { + "External id": 2944697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814217.382, "dur": 3.918, + "args": { + "External id": 2944698,"Record function id": 0, "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814218.729, "dur": 2.138, + "args": { + "External id": 2944699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814219.223, "dur": 1.225, + "args": { + "External id": 2944700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814219.667, "dur": 0.656, + "args": { + "External id": 2944701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814224.527, "dur": 6.064, + "args": { + "External id": 2944702,"Record function id": 0, "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814225.968, "dur": 4.143, + "args": { + "External id": 2944703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814226.429, "dur": 3.066, + "args": { + "External id": 2944704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814226.755, "dur": 2.640, + "args": { + "External id": 2944705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814233.678, "dur": 3.850, + "args": { + "External id": 2944706,"Record function id": 0, "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814234.850, "dur": 2.263, + "args": { + "External id": 2944707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814235.354, "dur": 1.245, + "args": { + "External id": 2944708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814235.826, "dur": 0.698, + "args": { + "External id": 2944709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814240.642, "dur": 4.084, + "args": { + "External id": 2944710,"Record function id": 0, "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016814241.911, "dur": 2.368, + "args": { + "External id": 2944711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814242.446, "dur": 1.419, + "args": { + "External id": 2944712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016814243.046, "dur": 0.671, + "args": { + "External id": 2944713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016814248.578, "dur": 36664.681, + "args": { + "External id": 2944714,"Record function id": 0, "Sequence number": 29294587, "Fwd thread id": 1, "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016814250.179, "dur": 36655.310, + "args": { + "External id": 2944715,"Sequence number": 29294587, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3786 + } + }, + { + "ph": "f", "id": 201, "pid": 1336755, "tid": 1381158, "ts": 1555016814250.179, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 1336755, "tid": 1381158, + "ts": 1555016814279.441, "dur": 36.340, + "args": { + "External id": 2944716,"Record function id": 0, "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 1336755, "tid": 1381158, + "ts": 1555016814323.510, "dur": 66.560, + "args": { + "External id": 2944717,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 1336755, "tid": 1381158, + "ts": 1555016814396.122, "dur": 36501.771, + "args": { + "External id": 2944718,"Record function id": 0, "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016814483.353, "dur": 6.564, + "args": { + "External id": 2944719,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016814499.040, "dur": 4.449, + "args": { + "External id": 2944720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016814517.782, "dur": 35575.292, + "args": { + "External id": 2944721,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016814533.689, "dur": 35551.138, + "args": { + "External id": 2944722,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016814578.490, "dur": 12.575, + "args": { + "External id": 2944723,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016814597.219, "dur": 35446.895, + "args": { + "External id": 2944724,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016814599.711, "dur": 35443.781, + "args": { + "External id": 2944725,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016814603.577, "dur": 5.361, + "args": { + "External id": 2944726,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016814610.775, "dur": 35429.428, + "args": { + "External id": 2944727,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016850193.579, "dur": 9.042, + "args": { + "External id": 2944728,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016850196.662, "dur": 5.445, + "args": { + "External id": 2944729,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016850232.026, "dur": 367.636, + "args": { + "External id": 2944730,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016850257.899, "dur": 336.920, + "args": { + "External id": 2944731,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3802, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016850269.289, "dur": 319.613, + "args": { + "External id": 2944732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016850619.194, "dur": 2.495, + "args": { + "External id": 2944733,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3804, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850681.085, "dur": 8.221, + "args": { + "External id": 2944734,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850733.122, "dur": 1.359, + "args": { + "External id": 2944735,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850751.718, "dur": 0.923, + "args": { + "External id": 2944736,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850766.340, "dur": 0.862, + "args": { + "External id": 2944737,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850779.826, "dur": 2.816, + "args": { + "External id": 2944738,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850795.142, "dur": 0.780, + "args": { + "External id": 2944739,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850809.490, "dur": 0.952, + "args": { + "External id": 2944740,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850822.860, "dur": 2.089, + "args": { + "External id": 2944741,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016850836.562, "dur": 3.139, + "args": { + "External id": 2944742,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016850926.995, "dur": 2755.640, + "args": { + "External id": 2944743,"Record function id": 0, "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 1336755, "tid": 1381158, + "ts": 1555016850944.545, "dur": 1018.287, + "args": { + "External id": 2944744,"Record function id": 0, "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 1336755, "tid": 1381158, + "ts": 1555016850958.134, "dur": 373.666, + "args": { + "External id": 2944745,"Record function id": 0, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851078.617, "dur": 4.704, + "args": { + "External id": 2944746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851086.441, "dur": 1.197, + "args": { + "External id": 2944747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851089.325, "dur": 1.141, + "args": { + "External id": 2944748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851091.991, "dur": 1.478, + "args": { + "External id": 2944749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851095.234, "dur": 0.716, + "args": { + "External id": 2944750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851097.558, "dur": 1.012, + "args": { + "External id": 2944751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851100.011, "dur": 4.020, + "args": { + "External id": 2944752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851105.509, "dur": 0.991, + "args": { + "External id": 2944753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851108.058, "dur": 1.141, + "args": { + "External id": 2944754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016851110.681, "dur": 0.712, + "args": { + "External id": 2944755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016851133.622, "dur": 165.470, + "args": { + "External id": 2944756,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016851164.257, "dur": 130.357, + "args": { + "External id": 2944757,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016851185.864, "dur": 11.849, + "args": { + "External id": 2944758,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016851201.063, "dur": 65.544, + "args": { + "External id": 2944759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016851203.699, "dur": 62.555, + "args": { + "External id": 2944760,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851207.784, "dur": 5.484, + "args": { + "External id": 2944761,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016851215.223, "dur": 50.227, + "args": { + "External id": 2944762,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3833 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 1336755, "tid": 1381158, + "ts": 1555016851426.214, "dur": 529.417, + "args": { + "External id": 2944763,"Record function id": 0, "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 1336755, "tid": 1381158, + "ts": 1555016851444.023, "dur": 500.010, + "args": { + "External id": 2944764,"Record function id": 0, "Ev Idx": 3835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016851508.402, "dur": 4.641, + "args": { + "External id": 2944765,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016851526.707, "dur": 29.119, + "args": { + "External id": 2944766,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851531.522, "dur": 1.468, + "args": { + "External id": 2944767,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851535.449, "dur": 2.351, + "args": { + "External id": 2944768,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851539.468, "dur": 0.596, + "args": { + "External id": 2944769,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851541.947, "dur": 0.349, + "args": { + "External id": 2944770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851543.562, "dur": 0.369, + "args": { + "External id": 2944771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851545.424, "dur": 0.402, + "args": { + "External id": 2944772,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851547.467, "dur": 0.407, + "args": { + "External id": 2944773,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851549.359, "dur": 0.401, + "args": { + "External id": 2944774,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851551.499, "dur": 0.381, + "args": { + "External id": 2944775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016851565.626, "dur": 33.118, + "args": { + "External id": 2944776,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016851629.006, "dur": 100.906, + "args": { + "External id": 2944777,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016851638.868, "dur": 5.346, + "args": { + "External id": 2944778,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016851649.787, "dur": 9.592, + "args": { + "External id": 2944779,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016851653.677, "dur": 5.300, + "args": { + "External id": 2944780,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851657.211, "dur": 0.576, + "args": { + "External id": 2944781,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016851666.342, "dur": 24.577, + "args": { + "External id": 2944782,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851669.065, "dur": 0.656, + "args": { + "External id": 2944783,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851671.524, "dur": 0.594, + "args": { + "External id": 2944784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851673.646, "dur": 0.382, + "args": { + "External id": 2944785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851675.706, "dur": 0.451, + "args": { + "External id": 2944786,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851677.595, "dur": 0.531, + "args": { + "External id": 2944787,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851679.718, "dur": 0.339, + "args": { + "External id": 2944788,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851682.023, "dur": 2.123, + "args": { + "External id": 2944789,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851685.685, "dur": 0.583, + "args": { + "External id": 2944790,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016851688.042, "dur": 0.389, + "args": { + "External id": 2944791,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016851703.521, "dur": 19.461, + "args": { + "External id": 2944792,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016851773.917, "dur": 107.670, + "args": { + "External id": 2944793,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016851794.133, "dur": 84.232, + "args": { + "External id": 2944794,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3865, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016851802.783, "dur": 71.838, + "args": { + "External id": 2944795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016851897.042, "dur": 1.591, + "args": { + "External id": 2944796,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3867, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016851968.996, "dur": 1693.214, + "args": { + "External id": 2944797,"Sequence number": 29294586, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3868 + } + }, + { + "ph": "f", "id": 202, "pid": 1336755, "tid": 1381158, "ts": 1555016851968.996, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852119.667, "dur": 122.541, + "args": { + "External id": 2944798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016852282.293, "dur": 41.582, + "args": { + "External id": 2944799,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852342.730, "dur": 52.804, + "args": { + "External id": 2944800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852404.996, "dur": 34.189, + "args": { + "External id": 2944801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852449.070, "dur": 47.317, + "args": { + "External id": 2944802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852506.493, "dur": 28.832, + "args": { + "External id": 2944803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852542.935, "dur": 44.184, + "args": { + "External id": 2944804,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016852607.904, "dur": 23.035, + "args": { + "External id": 2944805,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016852647.329, "dur": 32.306, + "args": { + "External id": 2944806,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016852699.261, "dur": 20.518, + "args": { + "External id": 2944807,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016852734.011, "dur": 17.032, + "args": { + "External id": 2944808,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852760.301, "dur": 27.913, + "args": { + "External id": 2944809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016852791.125, "dur": 32.805, + "args": { + "External id": 2944810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016852849.463, "dur": 217.488, + "args": { + "External id": 2944811,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016852925.426, "dur": 6.042, + "args": { + "External id": 2944812,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016852933.388, "dur": 4.437, + "args": { + "External id": 2944813,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016853102.762, "dur": 26.316, + "args": { + "External id": 2944814,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016853140.592, "dur": 33.436, + "args": { + "External id": 2944815,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016853185.682, "dur": 43.866, + "args": { + "External id": 2944816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016853236.936, "dur": 38.465, + "args": { + "External id": 2944817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016853282.455, "dur": 21.915, + "args": { + "External id": 2944818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016853312.203, "dur": 32.689, + "args": { + "External id": 2944819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016853352.895, "dur": 38.107, + "args": { + "External id": 2944820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016853404.589, "dur": 38.837, + "args": { + "External id": 2944821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016853462.794, "dur": 26.121, + "args": { + "External id": 2944822,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016853505.708, "dur": 26.145, + "args": { + "External id": 2944823,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016853549.405, "dur": 18.650, + "args": { + "External id": 2944824,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016853580.925, "dur": 15.224, + "args": { + "External id": 2944825,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016853614.024, "dur": 17.620, + "args": { + "External id": 2944826,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853703.285, "dur": 17.729, + "args": { + "External id": 2944827,"Record function id": 0, "Ev Idx": 3898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853706.451, "dur": 13.691, + "args": { + "External id": 2944828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853710.302, "dur": 9.093, + "args": { + "External id": 2944829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853712.206, "dur": 7.027, + "args": { + "External id": 2944830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853724.639, "dur": 5.022, + "args": { + "External id": 2944831,"Record function id": 0, "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853725.955, "dur": 3.144, + "args": { + "External id": 2944832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853726.587, "dur": 2.007, + "args": { + "External id": 2944833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853727.442, "dur": 1.053, + "args": { + "External id": 2944834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853732.801, "dur": 4.043, + "args": { + "External id": 2944835,"Record function id": 0, "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853733.898, "dur": 2.531, + "args": { + "External id": 2944836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853734.433, "dur": 1.456, + "args": { + "External id": 2944837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853734.980, "dur": 0.837, + "args": { + "External id": 2944838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853739.964, "dur": 7.020, + "args": { + "External id": 2944839,"Record function id": 0, "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853741.376, "dur": 5.152, + "args": { + "External id": 2944840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853742.363, "dur": 3.591, + "args": { + "External id": 2944841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853742.660, "dur": 3.230, + "args": { + "External id": 2944842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853750.080, "dur": 4.069, + "args": { + "External id": 2944843,"Record function id": 0, "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853751.148, "dur": 2.571, + "args": { + "External id": 2944844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853751.615, "dur": 1.530, + "args": { + "External id": 2944845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853752.183, "dur": 0.860, + "args": { + "External id": 2944846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853757.353, "dur": 3.639, + "args": { + "External id": 2944847,"Record function id": 0, "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853758.349, "dur": 2.221, + "args": { + "External id": 2944848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853758.810, "dur": 1.203, + "args": { + "External id": 2944849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853759.266, "dur": 0.627, + "args": { + "External id": 2944850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853764.215, "dur": 3.666, + "args": { + "External id": 2944851,"Record function id": 0, "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853765.315, "dur": 2.147, + "args": { + "External id": 2944852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853765.769, "dur": 1.267, + "args": { + "External id": 2944853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853766.262, "dur": 0.650, + "args": { + "External id": 2944854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853771.009, "dur": 3.493, + "args": { + "External id": 2944855,"Record function id": 0, "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853772.066, "dur": 2.003, + "args": { + "External id": 2944856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853772.536, "dur": 0.978, + "args": { + "External id": 2944857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853772.802, "dur": 0.648, + "args": { + "External id": 2944858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853777.586, "dur": 21.953, + "args": { + "External id": 2944859,"Record function id": 0, "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016853779.003, "dur": 19.943, + "args": { + "External id": 2944860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853779.715, "dur": 18.812, + "args": { + "External id": 2944861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016853797.607, "dur": 0.803, + "args": { + "External id": 2944862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016853803.375, "dur": 38300.098, + "args": { + "External id": 2944863,"Record function id": 0, "Sequence number": 29294585, "Fwd thread id": 1, "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016853804.737, "dur": 38289.345, + "args": { + "External id": 2944864,"Sequence number": 29294585, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3935 + } + }, + { + "ph": "f", "id": 203, "pid": 1336755, "tid": 1381158, "ts": 1555016853804.737, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 1336755, "tid": 1381158, + "ts": 1555016853834.354, "dur": 35.901, + "args": { + "External id": 2944865,"Record function id": 0, "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 1336755, "tid": 1381158, + "ts": 1555016853877.165, "dur": 62.421, + "args": { + "External id": 2944866,"Record function id": 0, "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 1336755, "tid": 1381158, + "ts": 1555016853945.073, "dur": 38141.230, + "args": { + "External id": 2944867,"Record function id": 0, "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016854085.543, "dur": 7.529, + "args": { + "External id": 2944868,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016854103.563, "dur": 4.713, + "args": { + "External id": 2944869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016854125.086, "dur": 36980.238, + "args": { + "External id": 2944870,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016854137.625, "dur": 36959.189, + "args": { + "External id": 2944871,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016854197.593, "dur": 15.654, + "args": { + "External id": 2944872,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016854219.636, "dur": 36836.588, + "args": { + "External id": 2944873,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016854222.096, "dur": 36833.378, + "args": { + "External id": 2944874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016854226.144, "dur": 5.628, + "args": { + "External id": 2944875,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016854233.520, "dur": 36818.552, + "args": { + "External id": 2944876,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016891206.682, "dur": 9.291, + "args": { + "External id": 2944877,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016891210.002, "dur": 5.452, + "args": { + "External id": 2944878,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016891244.700, "dur": 499.158, + "args": { + "External id": 2944879,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016891271.046, "dur": 467.318, + "args": { + "External id": 2944880,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3951, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016891282.372, "dur": 449.607, + "args": { + "External id": 2944881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016891764.323, "dur": 2.642, + "args": { + "External id": 2944882,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3953, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891827.935, "dur": 6.502, + "args": { + "External id": 2944883,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891885.130, "dur": 1.558, + "args": { + "External id": 2944884,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891904.463, "dur": 1.132, + "args": { + "External id": 2944885,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891920.402, "dur": 0.949, + "args": { + "External id": 2944886,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891932.592, "dur": 0.844, + "args": { + "External id": 2944887,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891944.272, "dur": 0.953, + "args": { + "External id": 2944888,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891957.698, "dur": 0.841, + "args": { + "External id": 2944889,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016891973.421, "dur": 1.964, + "args": { + "External id": 2944890,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892022.460, "dur": 1.673, + "args": { + "External id": 2944891,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016892118.553, "dur": 2743.042, + "args": { + "External id": 2944892,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 1336755, "tid": 1381158, + "ts": 1555016892138.210, "dur": 1034.229, + "args": { + "External id": 2944893,"Record function id": 0, "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 1336755, "tid": 1381158, + "ts": 1555016892170.119, "dur": 320.424, + "args": { + "External id": 2944894,"Record function id": 0, "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892258.007, "dur": 4.413, + "args": { + "External id": 2944895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892265.791, "dur": 1.328, + "args": { + "External id": 2944896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892269.510, "dur": 0.987, + "args": { + "External id": 2944897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892272.049, "dur": 2.786, + "args": { + "External id": 2944898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892276.359, "dur": 1.039, + "args": { + "External id": 2944899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892278.835, "dur": 0.824, + "args": { + "External id": 2944900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892281.252, "dur": 1.847, + "args": { + "External id": 2944901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892284.599, "dur": 0.942, + "args": { + "External id": 2944902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892287.102, "dur": 1.410, + "args": { + "External id": 2944903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016892290.536, "dur": 0.873, + "args": { + "External id": 2944904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016892308.851, "dur": 149.082, + "args": { + "External id": 2944905,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016892324.366, "dur": 129.253, + "args": { + "External id": 2944906,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016892341.800, "dur": 12.406, + "args": { + "External id": 2944907,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016892357.202, "dur": 67.755, + "args": { + "External id": 2944908,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016892359.593, "dur": 65.032, + "args": { + "External id": 2944909,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892363.666, "dur": 7.024, + "args": { + "External id": 2944910,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016892372.878, "dur": 50.847, + "args": { + "External id": 2944911,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 1336755, "tid": 1381158, + "ts": 1555016892581.478, "dur": 581.678, + "args": { + "External id": 2944912,"Record function id": 0, "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 1336755, "tid": 1381158, + "ts": 1555016892598.047, "dur": 536.834, + "args": { + "External id": 2944913,"Record function id": 0, "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016892658.027, "dur": 5.095, + "args": { + "External id": 2944914,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016892677.539, "dur": 27.786, + "args": { + "External id": 2944915,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892682.248, "dur": 1.547, + "args": { + "External id": 2944916,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892685.834, "dur": 0.489, + "args": { + "External id": 2944917,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892687.807, "dur": 0.606, + "args": { + "External id": 2944918,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892690.047, "dur": 0.400, + "args": { + "External id": 2944919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892691.928, "dur": 0.445, + "args": { + "External id": 2944920,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892693.881, "dur": 0.457, + "args": { + "External id": 2944921,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892695.826, "dur": 2.440, + "args": { + "External id": 2944922,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892699.635, "dur": 0.411, + "args": { + "External id": 2944923,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892701.786, "dur": 0.473, + "args": { + "External id": 2944924,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016892715.034, "dur": 32.698, + "args": { + "External id": 2944925,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016892777.660, "dur": 99.856, + "args": { + "External id": 2944926,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016892786.447, "dur": 3.467, + "args": { + "External id": 2944927,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016892795.795, "dur": 9.966, + "args": { + "External id": 2944928,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016892799.749, "dur": 5.612, + "args": { + "External id": 2944929,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892803.682, "dur": 0.526, + "args": { + "External id": 2944930,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016892812.837, "dur": 27.552, + "args": { + "External id": 2944931,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892815.172, "dur": 0.687, + "args": { + "External id": 2944932,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892817.559, "dur": 0.582, + "args": { + "External id": 2944933,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892819.947, "dur": 0.579, + "args": { + "External id": 2944934,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892821.791, "dur": 2.421, + "args": { + "External id": 2944935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892825.556, "dur": 0.586, + "args": { + "External id": 2944936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892827.719, "dur": 0.399, + "args": { + "External id": 2944937,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892829.500, "dur": 0.370, + "args": { + "External id": 2944938,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892831.703, "dur": 0.414, + "args": { + "External id": 2944939,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016892833.725, "dur": 0.338, + "args": { + "External id": 2944940,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016892850.729, "dur": 20.009, + "args": { + "External id": 2944941,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016892922.428, "dur": 144.654, + "args": { + "External id": 2944942,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016892942.036, "dur": 120.989, + "args": { + "External id": 2944943,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4014, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016892950.876, "dur": 107.491, + "args": { + "External id": 2944944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016893081.440, "dur": 1.693, + "args": { + "External id": 2944945,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4016, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016893180.340, "dur": 1660.976, + "args": { + "External id": 2944946,"Sequence number": 29294584, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4017 + } + }, + { + "ph": "f", "id": 204, "pid": 1336755, "tid": 1381158, "ts": 1555016893180.340, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893291.061, "dur": 105.760, + "args": { + "External id": 2944947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016893436.714, "dur": 41.362, + "args": { + "External id": 2944948,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893496.500, "dur": 53.000, + "args": { + "External id": 2944949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893558.897, "dur": 34.013, + "args": { + "External id": 2944950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893601.682, "dur": 48.476, + "args": { + "External id": 2944951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893657.945, "dur": 28.798, + "args": { + "External id": 2944952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893694.196, "dur": 43.445, + "args": { + "External id": 2944953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016893759.824, "dur": 26.282, + "args": { + "External id": 2944954,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016893809.714, "dur": 30.371, + "args": { + "External id": 2944955,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016893859.139, "dur": 23.987, + "args": { + "External id": 2944956,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016893896.375, "dur": 17.748, + "args": { + "External id": 2944957,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893923.185, "dur": 32.467, + "args": { + "External id": 2944958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016893959.107, "dur": 74.517, + "args": { + "External id": 2944959,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016894066.724, "dur": 195.050, + "args": { + "External id": 2944960,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016894161.868, "dur": 6.943, + "args": { + "External id": 2944961,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016894170.990, "dur": 2.631, + "args": { + "External id": 2944962,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016894298.494, "dur": 26.893, + "args": { + "External id": 2944963,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016894335.734, "dur": 16.939, + "args": { + "External id": 2944964,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016894361.886, "dur": 47.414, + "args": { + "External id": 2944965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016894416.917, "dur": 41.533, + "args": { + "External id": 2944966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016894466.897, "dur": 22.418, + "args": { + "External id": 2944967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016894495.373, "dur": 48.297, + "args": { + "External id": 2944968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016894556.551, "dur": 30.143, + "args": { + "External id": 2944969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016894594.765, "dur": 35.973, + "args": { + "External id": 2944970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016894648.319, "dur": 25.814, + "args": { + "External id": 2944971,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016894691.269, "dur": 26.350, + "args": { + "External id": 2944972,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016894734.941, "dur": 18.336, + "args": { + "External id": 2944973,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016894768.981, "dur": 15.196, + "args": { + "External id": 2944974,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016894795.598, "dur": 17.936, + "args": { + "External id": 2944975,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894882.761, "dur": 14.460, + "args": { + "External id": 2944976,"Record function id": 0, "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894886.146, "dur": 10.179, + "args": { + "External id": 2944977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894889.872, "dur": 5.589, + "args": { + "External id": 2944978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894891.678, "dur": 3.577, + "args": { + "External id": 2944979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894901.130, "dur": 5.299, + "args": { + "External id": 2944980,"Record function id": 0, "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894902.409, "dur": 3.555, + "args": { + "External id": 2944981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894903.197, "dur": 2.207, + "args": { + "External id": 2944982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894903.906, "dur": 1.395, + "args": { + "External id": 2944983,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894909.724, "dur": 6.715, + "args": { + "External id": 2944984,"Record function id": 0, "Ev Idx": 4055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894911.197, "dur": 4.828, + "args": { + "External id": 2944985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894911.870, "dur": 3.756, + "args": { + "External id": 2944986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894912.434, "dur": 3.117, + "args": { + "External id": 2944987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894919.703, "dur": 5.173, + "args": { + "External id": 2944988,"Record function id": 0, "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894921.149, "dur": 3.328, + "args": { + "External id": 2944989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894922.620, "dur": 1.479, + "args": { + "External id": 2944990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894923.286, "dur": 0.709, + "args": { + "External id": 2944991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894927.989, "dur": 4.457, + "args": { + "External id": 2944992,"Record function id": 0, "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894929.411, "dur": 2.625, + "args": { + "External id": 2944993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894930.196, "dur": 1.426, + "args": { + "External id": 2944994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894930.802, "dur": 0.743, + "args": { + "External id": 2944995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894935.567, "dur": 3.962, + "args": { + "External id": 2944996,"Record function id": 0, "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894936.704, "dur": 2.421, + "args": { + "External id": 2944997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894937.214, "dur": 1.309, + "args": { + "External id": 2944998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894937.698, "dur": 0.687, + "args": { + "External id": 2944999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894942.843, "dur": 3.657, + "args": { + "External id": 2945000,"Record function id": 0, "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894943.953, "dur": 2.144, + "args": { + "External id": 2945001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894944.529, "dur": 0.993, + "args": { + "External id": 2945002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894944.840, "dur": 0.556, + "args": { + "External id": 2945003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894949.573, "dur": 3.595, + "args": { + "External id": 2945004,"Record function id": 0, "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894950.448, "dur": 2.337, + "args": { + "External id": 2945005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894951.235, "dur": 1.146, + "args": { + "External id": 2945006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894951.604, "dur": 0.706, + "args": { + "External id": 2945007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894956.195, "dur": 3.388, + "args": { + "External id": 2945008,"Record function id": 0, "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016894957.076, "dur": 2.109, + "args": { + "External id": 2945009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894957.597, "dur": 1.189, + "args": { + "External id": 2945010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016894958.057, "dur": 0.604, + "args": { + "External id": 2945011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016894963.292, "dur": 35798.933, + "args": { + "External id": 2945012,"Record function id": 0, "Sequence number": 29294583, "Fwd thread id": 1, "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016894965.144, "dur": 35788.043, + "args": { + "External id": 2945013,"Sequence number": 29294583, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4084 + } + }, + { + "ph": "f", "id": 205, "pid": 1336755, "tid": 1381158, "ts": 1555016894965.144, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 1336755, "tid": 1381158, + "ts": 1555016895025.925, "dur": 37.124, + "args": { + "External id": 2945014,"Record function id": 0, "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 1336755, "tid": 1381158, + "ts": 1555016895070.968, "dur": 84.621, + "args": { + "External id": 2945015,"Record function id": 0, "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 1336755, "tid": 1381158, + "ts": 1555016895164.420, "dur": 35581.045, + "args": { + "External id": 2945016,"Record function id": 0, "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016895253.783, "dur": 6.601, + "args": { + "External id": 2945017,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016895269.973, "dur": 6.486, + "args": { + "External id": 2945018,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016895290.789, "dur": 34628.554, + "args": { + "External id": 2945019,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016895303.123, "dur": 34606.724, + "args": { + "External id": 2945020,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016895371.051, "dur": 13.314, + "args": { + "External id": 2945021,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016895390.567, "dur": 34479.603, + "args": { + "External id": 2945022,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016895392.974, "dur": 34476.276, + "args": { + "External id": 2945023,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016895396.800, "dur": 4.823, + "args": { + "External id": 2945024,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016895424.006, "dur": 34441.557, + "args": { + "External id": 2945025,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016930040.464, "dur": 10.443, + "args": { + "External id": 2945026,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016930044.180, "dur": 6.101, + "args": { + "External id": 2945027,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016930082.246, "dur": 370.619, + "args": { + "External id": 2945028,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016930108.529, "dur": 339.333, + "args": { + "External id": 2945029,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4100, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016930120.369, "dur": 321.726, + "args": { + "External id": 2945030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016930471.995, "dur": 2.358, + "args": { + "External id": 2945031,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4102, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930532.496, "dur": 6.468, + "args": { + "External id": 2945032,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930580.747, "dur": 1.280, + "args": { + "External id": 2945033,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930598.467, "dur": 3.255, + "args": { + "External id": 2945034,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930613.823, "dur": 0.862, + "args": { + "External id": 2945035,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930627.693, "dur": 0.912, + "args": { + "External id": 2945036,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930640.381, "dur": 0.835, + "args": { + "External id": 2945037,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930657.794, "dur": 2.663, + "args": { + "External id": 2945038,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930673.277, "dur": 1.843, + "args": { + "External id": 2945039,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016930685.333, "dur": 0.967, + "args": { + "External id": 2945040,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016930776.067, "dur": 2821.235, + "args": { + "External id": 2945041,"Record function id": 0, "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 1336755, "tid": 1381158, + "ts": 1555016930794.934, "dur": 1037.418, + "args": { + "External id": 2945042,"Record function id": 0, "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 1336755, "tid": 1381158, + "ts": 1555016930811.776, "dur": 377.923, + "args": { + "External id": 2945043,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930895.638, "dur": 4.000, + "args": { + "External id": 2945044,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930902.834, "dur": 1.353, + "args": { + "External id": 2945045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930905.957, "dur": 3.035, + "args": { + "External id": 2945046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930910.454, "dur": 1.018, + "args": { + "External id": 2945047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930913.009, "dur": 1.383, + "args": { + "External id": 2945048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930915.851, "dur": 1.068, + "args": { + "External id": 2945049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930918.177, "dur": 1.605, + "args": { + "External id": 2945050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930921.070, "dur": 0.931, + "args": { + "External id": 2945051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930923.425, "dur": 1.278, + "args": { + "External id": 2945052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016930926.289, "dur": 1.819, + "args": { + "External id": 2945053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016930944.891, "dur": 192.471, + "args": { + "External id": 2945054,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016930959.855, "dur": 172.589, + "args": { + "External id": 2945055,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016930975.120, "dur": 53.447, + "args": { + "External id": 2945056,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016931032.897, "dur": 68.535, + "args": { + "External id": 2945057,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016931035.796, "dur": 65.307, + "args": { + "External id": 2945058,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931039.715, "dur": 6.085, + "args": { + "External id": 2945059,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016931047.940, "dur": 52.704, + "args": { + "External id": 2945060,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 1336755, "tid": 1381158, + "ts": 1555016931286.461, "dur": 538.051, + "args": { + "External id": 2945061,"Record function id": 0, "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 1336755, "tid": 1381158, + "ts": 1555016931302.902, "dur": 510.173, + "args": { + "External id": 2945062,"Record function id": 0, "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016931364.989, "dur": 5.427, + "args": { + "External id": 2945063,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016931385.156, "dur": 33.028, + "args": { + "External id": 2945064,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931390.310, "dur": 1.490, + "args": { + "External id": 2945065,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931393.864, "dur": 0.908, + "args": { + "External id": 2945066,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931396.487, "dur": 0.453, + "args": { + "External id": 2945067,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931398.592, "dur": 0.591, + "args": { + "External id": 2945068,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931400.588, "dur": 0.451, + "args": { + "External id": 2945069,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931402.587, "dur": 2.232, + "args": { + "External id": 2945070,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931410.586, "dur": 0.595, + "args": { + "External id": 2945071,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931412.650, "dur": 0.315, + "args": { + "External id": 2945072,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931414.464, "dur": 0.810, + "args": { + "External id": 2945073,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016931427.727, "dur": 36.801, + "args": { + "External id": 2945074,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016931494.536, "dur": 108.012, + "args": { + "External id": 2945075,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016931504.294, "dur": 2.926, + "args": { + "External id": 2945076,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016931513.430, "dur": 10.529, + "args": { + "External id": 2945077,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016931517.488, "dur": 6.059, + "args": { + "External id": 2945078,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931521.242, "dur": 0.817, + "args": { + "External id": 2945079,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016931534.647, "dur": 27.497, + "args": { + "External id": 2945080,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931536.825, "dur": 0.624, + "args": { + "External id": 2945081,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931538.949, "dur": 0.681, + "args": { + "External id": 2945082,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931541.085, "dur": 2.539, + "args": { + "External id": 2945083,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931545.250, "dur": 0.381, + "args": { + "External id": 2945084,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931546.953, "dur": 0.508, + "args": { + "External id": 2945085,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931548.904, "dur": 0.362, + "args": { + "External id": 2945086,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931550.682, "dur": 0.522, + "args": { + "External id": 2945087,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931552.857, "dur": 0.536, + "args": { + "External id": 2945088,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016931555.219, "dur": 0.358, + "args": { + "External id": 2945089,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016931574.266, "dur": 20.142, + "args": { + "External id": 2945090,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016931645.467, "dur": 105.298, + "args": { + "External id": 2945091,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016931666.125, "dur": 81.288, + "args": { + "External id": 2945092,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4163, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016931675.506, "dur": 68.026, + "args": { + "External id": 2945093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016931767.845, "dur": 1.728, + "args": { + "External id": 2945094,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4165, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016931840.367, "dur": 1735.064, + "args": { + "External id": 2945095,"Sequence number": 29294582, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4166 + } + }, + { + "ph": "f", "id": 206, "pid": 1336755, "tid": 1381158, "ts": 1555016931840.367, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016931947.647, "dur": 146.813, + "args": { + "External id": 2945096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016932136.577, "dur": 60.404, + "args": { + "External id": 2945097,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016932217.892, "dur": 58.217, + "args": { + "External id": 2945098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016932288.407, "dur": 34.711, + "args": { + "External id": 2945099,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016932330.453, "dur": 47.444, + "args": { + "External id": 2945100,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016932386.011, "dur": 29.416, + "args": { + "External id": 2945101,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016932423.859, "dur": 45.760, + "args": { + "External id": 2945102,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016932494.939, "dur": 28.060, + "args": { + "External id": 2945103,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016932550.292, "dur": 32.609, + "args": { + "External id": 2945104,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016932602.809, "dur": 20.972, + "args": { + "External id": 2945105,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016932638.946, "dur": 17.202, + "args": { + "External id": 2945106,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016932665.058, "dur": 29.072, + "args": { + "External id": 2945107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016932697.522, "dur": 34.652, + "args": { + "External id": 2945108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016932760.384, "dur": 169.714, + "args": { + "External id": 2945109,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016932834.470, "dur": 6.201, + "args": { + "External id": 2945110,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016932842.746, "dur": 2.339, + "args": { + "External id": 2945111,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016932962.581, "dur": 63.885, + "args": { + "External id": 2945112,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016933041.372, "dur": 18.855, + "args": { + "External id": 2945113,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016933071.681, "dur": 43.546, + "args": { + "External id": 2945114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016933122.232, "dur": 55.945, + "args": { + "External id": 2945115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016933190.484, "dur": 25.849, + "args": { + "External id": 2945116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016933221.898, "dur": 33.199, + "args": { + "External id": 2945117,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016933273.143, "dur": 32.976, + "args": { + "External id": 2945118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016933315.838, "dur": 37.579, + "args": { + "External id": 2945119,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016933374.514, "dur": 27.507, + "args": { + "External id": 2945120,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016933419.567, "dur": 25.434, + "args": { + "External id": 2945121,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016933462.711, "dur": 19.480, + "args": { + "External id": 2945122,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016933495.833, "dur": 18.776, + "args": { + "External id": 2945123,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016933526.322, "dur": 17.153, + "args": { + "External id": 2945124,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933618.520, "dur": 18.583, + "args": { + "External id": 2945125,"Record function id": 0, "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933621.614, "dur": 14.598, + "args": { + "External id": 2945126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933629.010, "dur": 6.105, + "args": { + "External id": 2945127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933630.886, "dur": 4.100, + "args": { + "External id": 2945128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933641.150, "dur": 7.551, + "args": { + "External id": 2945129,"Record function id": 0, "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933642.592, "dur": 5.691, + "args": { + "External id": 2945130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933643.410, "dur": 4.367, + "args": { + "External id": 2945131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933644.217, "dur": 3.450, + "args": { + "External id": 2945132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933651.881, "dur": 4.989, + "args": { + "External id": 2945133,"Record function id": 0, "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933653.408, "dur": 3.045, + "args": { + "External id": 2945134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933654.259, "dur": 1.759, + "args": { + "External id": 2945135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933654.727, "dur": 1.181, + "args": { + "External id": 2945136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933660.287, "dur": 4.206, + "args": { + "External id": 2945137,"Record function id": 0, "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933661.681, "dur": 2.413, + "args": { + "External id": 2945138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933662.233, "dur": 1.410, + "args": { + "External id": 2945139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933662.690, "dur": 0.877, + "args": { + "External id": 2945140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933667.594, "dur": 3.396, + "args": { + "External id": 2945141,"Record function id": 0, "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933668.764, "dur": 1.817, + "args": { + "External id": 2945142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933669.225, "dur": 0.951, + "args": { + "External id": 2945143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933669.519, "dur": 0.585, + "args": { + "External id": 2945144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933674.205, "dur": 4.405, + "args": { + "External id": 2945145,"Record function id": 0, "Ev Idx": 4216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933675.587, "dur": 2.544, + "args": { + "External id": 2945146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933676.101, "dur": 1.476, + "args": { + "External id": 2945147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933676.646, "dur": 0.826, + "args": { + "External id": 2945148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933681.801, "dur": 4.007, + "args": { + "External id": 2945149,"Record function id": 0, "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933683.059, "dur": 2.311, + "args": { + "External id": 2945150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933683.634, "dur": 1.093, + "args": { + "External id": 2945151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933683.902, "dur": 0.737, + "args": { + "External id": 2945152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933688.832, "dur": 4.233, + "args": { + "External id": 2945153,"Record function id": 0, "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933690.223, "dur": 2.448, + "args": { + "External id": 2945154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933691.094, "dur": 1.156, + "args": { + "External id": 2945155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933691.367, "dur": 0.804, + "args": { + "External id": 2945156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933696.224, "dur": 6.338, + "args": { + "External id": 2945157,"Record function id": 0, "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016933697.290, "dur": 4.828, + "args": { + "External id": 2945158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933698.190, "dur": 3.527, + "args": { + "External id": 2945159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016933698.676, "dur": 2.958, + "args": { + "External id": 2945160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016933706.190, "dur": 36006.185, + "args": { + "External id": 2945161,"Record function id": 0, "Sequence number": 29294581, "Fwd thread id": 1, "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016933707.937, "dur": 35996.011, + "args": { + "External id": 2945162,"Sequence number": 29294581, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4233 + } + }, + { + "ph": "f", "id": 207, "pid": 1336755, "tid": 1381158, "ts": 1555016933707.937, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 1336755, "tid": 1381158, + "ts": 1555016933735.023, "dur": 36.016, + "args": { + "External id": 2945163,"Record function id": 0, "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 1336755, "tid": 1381158, + "ts": 1555016933778.388, "dur": 65.784, + "args": { + "External id": 2945164,"Record function id": 0, "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 1336755, "tid": 1381158, + "ts": 1555016933849.692, "dur": 35846.957, + "args": { + "External id": 2945165,"Record function id": 0, "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016933935.931, "dur": 6.134, + "args": { + "External id": 2945166,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016933951.052, "dur": 4.398, + "args": { + "External id": 2945167,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016933972.249, "dur": 34893.253, + "args": { + "External id": 2945168,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016934023.312, "dur": 34833.383, + "args": { + "External id": 2945169,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016934076.517, "dur": 14.007, + "args": { + "External id": 2945170,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016934096.915, "dur": 34716.794, + "args": { + "External id": 2945171,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016934099.575, "dur": 34713.499, + "args": { + "External id": 2945172,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016934103.480, "dur": 5.906, + "args": { + "External id": 2945173,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016934111.445, "dur": 34698.236, + "args": { + "External id": 2945174,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016968955.715, "dur": 8.982, + "args": { + "External id": 2945175,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016968959.004, "dur": 5.329, + "args": { + "External id": 2945176,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016969018.556, "dur": 397.477, + "args": { + "External id": 2945177,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016969044.765, "dur": 366.050, + "args": { + "External id": 2945178,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4249, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016969056.377, "dur": 348.284, + "args": { + "External id": 2945179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016969436.314, "dur": 2.582, + "args": { + "External id": 2945180,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4251, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969497.451, "dur": 6.184, + "args": { + "External id": 2945181,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969545.291, "dur": 3.827, + "args": { + "External id": 2945182,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969564.134, "dur": 1.290, + "args": { + "External id": 2945183,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969577.648, "dur": 0.998, + "args": { + "External id": 2945184,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969590.227, "dur": 1.035, + "args": { + "External id": 2945185,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969602.329, "dur": 2.632, + "args": { + "External id": 2945186,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969616.467, "dur": 0.755, + "args": { + "External id": 2945187,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969629.265, "dur": 0.939, + "args": { + "External id": 2945188,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969640.494, "dur": 0.732, + "args": { + "External id": 2945189,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016969725.651, "dur": 2787.112, + "args": { + "External id": 2945190,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 1336755, "tid": 1381158, + "ts": 1555016969744.709, "dur": 1029.153, + "args": { + "External id": 2945191,"Record function id": 0, "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 1336755, "tid": 1381158, + "ts": 1555016969759.751, "dur": 354.400, + "args": { + "External id": 2945192,"Record function id": 0, "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969841.675, "dur": 6.452, + "args": { + "External id": 2945193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969851.237, "dur": 1.528, + "args": { + "External id": 2945194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969854.639, "dur": 1.241, + "args": { + "External id": 2945195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969857.461, "dur": 1.068, + "args": { + "External id": 2945196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969859.937, "dur": 1.191, + "args": { + "External id": 2945197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969862.639, "dur": 1.084, + "args": { + "External id": 2945198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969865.210, "dur": 1.341, + "args": { + "External id": 2945199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969868.151, "dur": 1.061, + "args": { + "External id": 2945200,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969870.864, "dur": 3.040, + "args": { + "External id": 2945201,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016969875.480, "dur": 0.959, + "args": { + "External id": 2945202,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016969893.586, "dur": 189.338, + "args": { + "External id": 2945203,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016969909.223, "dur": 168.729, + "args": { + "External id": 2945204,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016969927.201, "dur": 11.702, + "args": { + "External id": 2945205,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016969941.708, "dur": 106.554, + "args": { + "External id": 2945206,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016969944.228, "dur": 103.570, + "args": { + "External id": 2945207,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016969948.083, "dur": 5.366, + "args": { + "External id": 2945208,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016969957.327, "dur": 89.239, + "args": { + "External id": 2945209,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 1336755, "tid": 1381158, + "ts": 1555016970224.519, "dur": 542.612, + "args": { + "External id": 2945210,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 1336755, "tid": 1381158, + "ts": 1555016970244.677, "dur": 510.924, + "args": { + "External id": 2945211,"Record function id": 0, "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016970310.203, "dur": 5.784, + "args": { + "External id": 2945212,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016970330.723, "dur": 30.966, + "args": { + "External id": 2945213,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970335.507, "dur": 1.628, + "args": { + "External id": 2945214,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970339.358, "dur": 1.189, + "args": { + "External id": 2945215,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970342.410, "dur": 0.540, + "args": { + "External id": 2945216,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970344.495, "dur": 2.723, + "args": { + "External id": 2945217,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970348.855, "dur": 0.454, + "args": { + "External id": 2945218,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970350.882, "dur": 0.675, + "args": { + "External id": 2945219,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970353.087, "dur": 0.412, + "args": { + "External id": 2945220,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970355.037, "dur": 0.380, + "args": { + "External id": 2945221,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970357.014, "dur": 0.340, + "args": { + "External id": 2945222,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016970370.978, "dur": 37.322, + "args": { + "External id": 2945223,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555016970437.809, "dur": 107.048, + "args": { + "External id": 2945224,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016970448.735, "dur": 3.120, + "args": { + "External id": 2945225,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555016970457.400, "dur": 10.062, + "args": { + "External id": 2945226,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555016970461.519, "dur": 5.530, + "args": { + "External id": 2945227,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970465.329, "dur": 0.535, + "args": { + "External id": 2945228,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555016970473.888, "dur": 31.547, + "args": { + "External id": 2945229,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970476.108, "dur": 2.642, + "args": { + "External id": 2945230,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970480.481, "dur": 0.635, + "args": { + "External id": 2945231,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970483.030, "dur": 4.074, + "args": { + "External id": 2945232,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970488.673, "dur": 0.428, + "args": { + "External id": 2945233,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970490.785, "dur": 0.664, + "args": { + "External id": 2945234,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970492.897, "dur": 0.390, + "args": { + "External id": 2945235,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970495.122, "dur": 0.497, + "args": { + "External id": 2945236,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970496.941, "dur": 0.438, + "args": { + "External id": 2945237,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016970498.649, "dur": 2.512, + "args": { + "External id": 2945238,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016970514.540, "dur": 20.278, + "args": { + "External id": 2945239,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555016970587.634, "dur": 103.410, + "args": { + "External id": 2945240,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016970609.128, "dur": 78.659, + "args": { + "External id": 2945241,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4312, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555016970618.123, "dur": 65.052, + "args": { + "External id": 2945242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555016970707.819, "dur": 1.758, + "args": { + "External id": 2945243,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4314, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016970780.548, "dur": 1709.212, + "args": { + "External id": 2945244,"Sequence number": 29294580, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4315 + } + }, + { + "ph": "f", "id": 208, "pid": 1336755, "tid": 1381158, "ts": 1555016970780.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016970884.880, "dur": 145.064, + "args": { + "External id": 2945245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016971074.241, "dur": 42.649, + "args": { + "External id": 2945246,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971135.375, "dur": 73.116, + "args": { + "External id": 2945247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971222.583, "dur": 33.794, + "args": { + "External id": 2945248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971263.504, "dur": 45.315, + "args": { + "External id": 2945249,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971315.856, "dur": 27.281, + "args": { + "External id": 2945250,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971353.246, "dur": 40.782, + "args": { + "External id": 2945251,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016971418.354, "dur": 25.243, + "args": { + "External id": 2945252,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555016971461.732, "dur": 30.565, + "args": { + "External id": 2945253,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016971514.500, "dur": 19.673, + "args": { + "External id": 2945254,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016971546.993, "dur": 16.538, + "args": { + "External id": 2945255,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971572.355, "dur": 29.067, + "args": { + "External id": 2945256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971604.528, "dur": 34.380, + "args": { + "External id": 2945257,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555016971663.595, "dur": 170.403, + "args": { + "External id": 2945258,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016971739.626, "dur": 5.657, + "args": { + "External id": 2945259,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016971747.615, "dur": 2.261, + "args": { + "External id": 2945260,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016971867.361, "dur": 25.720, + "args": { + "External id": 2945261,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555016971904.528, "dur": 17.128, + "args": { + "External id": 2945262,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971929.958, "dur": 34.759, + "args": { + "External id": 2945263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016971973.975, "dur": 79.988, + "args": { + "External id": 2945264,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016972064.295, "dur": 27.754, + "args": { + "External id": 2945265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016972103.789, "dur": 75.647, + "args": { + "External id": 2945266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016972197.886, "dur": 35.107, + "args": { + "External id": 2945267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555016972243.352, "dur": 33.758, + "args": { + "External id": 2945268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555016972296.671, "dur": 24.536, + "args": { + "External id": 2945269,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016972337.672, "dur": 26.619, + "args": { + "External id": 2945270,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555016972380.884, "dur": 18.773, + "args": { + "External id": 2945271,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555016972413.949, "dur": 15.170, + "args": { + "External id": 2945272,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555016972440.720, "dur": 21.319, + "args": { + "External id": 2945273,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972533.737, "dur": 15.798, + "args": { + "External id": 2945274,"Record function id": 0, "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972536.677, "dur": 11.919, + "args": { + "External id": 2945275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972539.908, "dur": 7.763, + "args": { + "External id": 2945276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972541.815, "dur": 5.719, + "args": { + "External id": 2945277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972553.327, "dur": 5.267, + "args": { + "External id": 2945278,"Record function id": 0, "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972554.790, "dur": 3.363, + "args": { + "External id": 2945279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972555.562, "dur": 2.121, + "args": { + "External id": 2945280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972556.313, "dur": 1.264, + "args": { + "External id": 2945281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972561.866, "dur": 4.780, + "args": { + "External id": 2945282,"Record function id": 0, "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972563.280, "dur": 2.903, + "args": { + "External id": 2945283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972563.914, "dur": 1.712, + "args": { + "External id": 2945284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972564.519, "dur": 0.976, + "args": { + "External id": 2945285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972569.786, "dur": 4.315, + "args": { + "External id": 2945286,"Record function id": 0, "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972570.711, "dur": 2.998, + "args": { + "External id": 2945287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972571.809, "dur": 1.430, + "args": { + "External id": 2945288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972572.453, "dur": 0.690, + "args": { + "External id": 2945289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972577.145, "dur": 4.109, + "args": { + "External id": 2945290,"Record function id": 0, "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972578.242, "dur": 2.617, + "args": { + "External id": 2945291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972578.954, "dur": 1.510, + "args": { + "External id": 2945292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972579.654, "dur": 0.674, + "args": { + "External id": 2945293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972584.312, "dur": 7.185, + "args": { + "External id": 2945294,"Record function id": 0, "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972585.335, "dur": 5.657, + "args": { + "External id": 2945295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972586.032, "dur": 4.533, + "args": { + "External id": 2945296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972589.650, "dur": 0.767, + "args": { + "External id": 2945297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972594.731, "dur": 3.663, + "args": { + "External id": 2945298,"Record function id": 0, "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972595.930, "dur": 2.031, + "args": { + "External id": 2945299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972596.417, "dur": 0.997, + "args": { + "External id": 2945300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972596.696, "dur": 0.610, + "args": { + "External id": 2945301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972601.437, "dur": 5.513, + "args": { + "External id": 2945302,"Record function id": 0, "Ev Idx": 4373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972602.320, "dur": 4.199, + "args": { + "External id": 2945303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972602.790, "dur": 3.351, + "args": { + "External id": 2945304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972603.075, "dur": 2.920, + "args": { + "External id": 2945305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972609.993, "dur": 3.786, + "args": { + "External id": 2945306,"Record function id": 0, "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555016972611.088, "dur": 2.263, + "args": { + "External id": 2945307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972611.544, "dur": 1.286, + "args": { + "External id": 2945308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555016972611.950, "dur": 0.708, + "args": { + "External id": 2945309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016972617.370, "dur": 36490.923, + "args": { + "External id": 2945310,"Record function id": 0, "Sequence number": 29294579, "Fwd thread id": 1, "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555016972618.497, "dur": 36481.466, + "args": { + "External id": 2945311,"Sequence number": 29294579, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4382 + } + }, + { + "ph": "f", "id": 209, "pid": 1336755, "tid": 1381158, "ts": 1555016972618.497, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 1336755, "tid": 1381158, + "ts": 1555016972648.039, "dur": 33.252, + "args": { + "External id": 2945312,"Record function id": 0, "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 1336755, "tid": 1381158, + "ts": 1555016972688.345, "dur": 61.782, + "args": { + "External id": 2945313,"Record function id": 0, "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 1336755, "tid": 1381158, + "ts": 1555016972755.206, "dur": 36337.327, + "args": { + "External id": 2945314,"Record function id": 0, "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016972844.850, "dur": 5.957, + "args": { + "External id": 2945315,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555016972859.710, "dur": 4.607, + "args": { + "External id": 2945316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016972876.806, "dur": 35228.588, + "args": { + "External id": 2945317,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555016972888.818, "dur": 35207.770, + "args": { + "External id": 2945318,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555016972937.187, "dur": 12.893, + "args": { + "External id": 2945319,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555016972959.503, "dur": 35096.612, + "args": { + "External id": 2945320,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555016972962.282, "dur": 35093.158, + "args": { + "External id": 2945321,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555016972966.142, "dur": 4.483, + "args": { + "External id": 2945322,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555016972972.389, "dur": 35079.739, + "args": { + "External id": 2945323,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017008199.622, "dur": 8.507, + "args": { + "External id": 2945324,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017008202.444, "dur": 5.249, + "args": { + "External id": 2945325,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017008236.756, "dur": 520.181, + "args": { + "External id": 2945326,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017008265.501, "dur": 486.404, + "args": { + "External id": 2945327,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4398, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017008278.332, "dur": 467.955, + "args": { + "External id": 2945328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017008780.606, "dur": 2.347, + "args": { + "External id": 2945329,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4400, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008841.336, "dur": 6.405, + "args": { + "External id": 2945330,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008890.811, "dur": 1.247, + "args": { + "External id": 2945331,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008907.151, "dur": 1.749, + "args": { + "External id": 2945332,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008920.608, "dur": 1.003, + "args": { + "External id": 2945333,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008932.795, "dur": 1.207, + "args": { + "External id": 2945334,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008946.378, "dur": 1.320, + "args": { + "External id": 2945335,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008957.528, "dur": 1.088, + "args": { + "External id": 2945336,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017008969.245, "dur": 0.926, + "args": { + "External id": 2945337,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009025.018, "dur": 1.853, + "args": { + "External id": 2945338,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017009123.163, "dur": 2677.947, + "args": { + "External id": 2945339,"Record function id": 0, "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 1336755, "tid": 1381158, + "ts": 1555017009154.357, "dur": 1018.508, + "args": { + "External id": 2945340,"Record function id": 0, "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 1336755, "tid": 1381158, + "ts": 1555017009171.363, "dur": 307.295, + "args": { + "External id": 2945341,"Record function id": 0, "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009254.569, "dur": 5.157, + "args": { + "External id": 2945342,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009262.970, "dur": 1.189, + "args": { + "External id": 2945343,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009265.857, "dur": 1.027, + "args": { + "External id": 2945344,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009268.547, "dur": 1.058, + "args": { + "External id": 2945345,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009273.361, "dur": 1.255, + "args": { + "External id": 2945346,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009276.122, "dur": 1.102, + "args": { + "External id": 2945347,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009278.924, "dur": 0.968, + "args": { + "External id": 2945348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009281.269, "dur": 2.821, + "args": { + "External id": 2945349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009287.029, "dur": 1.022, + "args": { + "External id": 2945350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017009289.622, "dur": 0.682, + "args": { + "External id": 2945351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017009308.000, "dur": 144.241, + "args": { + "External id": 2945352,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017009323.360, "dur": 124.964, + "args": { + "External id": 2945353,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017009339.988, "dur": 11.876, + "args": { + "External id": 2945354,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017009354.584, "dur": 66.303, + "args": { + "External id": 2945355,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017009356.955, "dur": 63.595, + "args": { + "External id": 2945356,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009361.019, "dur": 6.019, + "args": { + "External id": 2945357,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017009370.416, "dur": 49.521, + "args": { + "External id": 2945358,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 1336755, "tid": 1381158, + "ts": 1555017009565.535, "dur": 598.369, + "args": { + "External id": 2945359,"Record function id": 0, "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 1336755, "tid": 1381158, + "ts": 1555017009583.671, "dur": 548.217, + "args": { + "External id": 2945360,"Record function id": 0, "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017009642.515, "dur": 4.160, + "args": { + "External id": 2945361,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017009660.160, "dur": 26.909, + "args": { + "External id": 2945362,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009664.384, "dur": 2.604, + "args": { + "External id": 2945363,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009668.861, "dur": 0.449, + "args": { + "External id": 2945364,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009670.465, "dur": 2.452, + "args": { + "External id": 2945365,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009675.155, "dur": 0.600, + "args": { + "External id": 2945366,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009677.221, "dur": 0.438, + "args": { + "External id": 2945367,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009678.796, "dur": 0.468, + "args": { + "External id": 2945368,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009680.972, "dur": 0.478, + "args": { + "External id": 2945369,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009682.859, "dur": 0.345, + "args": { + "External id": 2945370,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009684.366, "dur": 0.323, + "args": { + "External id": 2945371,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017009696.325, "dur": 32.002, + "args": { + "External id": 2945372,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017009758.161, "dur": 104.617, + "args": { + "External id": 2945373,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017009767.489, "dur": 2.930, + "args": { + "External id": 2945374,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017009775.962, "dur": 11.017, + "args": { + "External id": 2945375,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017009779.780, "dur": 6.806, + "args": { + "External id": 2945376,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009783.205, "dur": 2.255, + "args": { + "External id": 2945377,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017009793.966, "dur": 28.477, + "args": { + "External id": 2945378,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009796.312, "dur": 0.527, + "args": { + "External id": 2945379,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009798.603, "dur": 0.414, + "args": { + "External id": 2945380,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009800.778, "dur": 0.427, + "args": { + "External id": 2945381,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009802.694, "dur": 0.585, + "args": { + "External id": 2945382,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009804.673, "dur": 0.400, + "args": { + "External id": 2945383,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009806.735, "dur": 0.349, + "args": { + "External id": 2945384,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009808.517, "dur": 0.594, + "args": { + "External id": 2945385,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009810.484, "dur": 2.540, + "args": { + "External id": 2945386,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017009814.641, "dur": 0.415, + "args": { + "External id": 2945387,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017009836.027, "dur": 20.044, + "args": { + "External id": 2945388,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017009904.395, "dur": 152.735, + "args": { + "External id": 2945389,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017009934.376, "dur": 118.906, + "args": { + "External id": 2945390,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4461, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017009942.812, "dur": 105.983, + "args": { + "External id": 2945391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017010076.566, "dur": 1.912, + "args": { + "External id": 2945392,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4463, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017010181.121, "dur": 1599.302, + "args": { + "External id": 2945393,"Sequence number": 29294578, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4464 + } + }, + { + "ph": "f", "id": 210, "pid": 1336755, "tid": 1381158, "ts": 1555017010181.121, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010292.130, "dur": 104.162, + "args": { + "External id": 2945394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017010435.781, "dur": 40.621, + "args": { + "External id": 2945395,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010494.599, "dur": 49.729, + "args": { + "External id": 2945396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010553.865, "dur": 32.497, + "args": { + "External id": 2945397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010592.259, "dur": 45.507, + "args": { + "External id": 2945398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010644.259, "dur": 27.339, + "args": { + "External id": 2945399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010680.787, "dur": 41.416, + "args": { + "External id": 2945400,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017010745.443, "dur": 25.430, + "args": { + "External id": 2945401,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017010790.456, "dur": 30.507, + "args": { + "External id": 2945402,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017010840.116, "dur": 19.344, + "args": { + "External id": 2945403,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017010873.257, "dur": 14.995, + "args": { + "External id": 2945404,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010898.470, "dur": 29.416, + "args": { + "External id": 2945405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017010931.229, "dur": 35.333, + "args": { + "External id": 2945406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017011030.101, "dur": 186.759, + "args": { + "External id": 2945407,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017011107.681, "dur": 6.340, + "args": { + "External id": 2945408,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017011116.334, "dur": 2.881, + "args": { + "External id": 2945409,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017011255.818, "dur": 24.840, + "args": { + "External id": 2945410,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017011292.890, "dur": 15.714, + "args": { + "External id": 2945411,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017011318.580, "dur": 49.301, + "args": { + "External id": 2945412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017011376.297, "dur": 37.000, + "args": { + "External id": 2945413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017011419.889, "dur": 24.473, + "args": { + "External id": 2945414,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017011449.103, "dur": 30.001, + "args": { + "External id": 2945415,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017011486.201, "dur": 20.149, + "args": { + "External id": 2945416,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017011514.033, "dur": 29.000, + "args": { + "External id": 2945417,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017011577.077, "dur": 28.086, + "args": { + "External id": 2945418,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017011627.596, "dur": 26.935, + "args": { + "External id": 2945419,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017011669.273, "dur": 17.740, + "args": { + "External id": 2945420,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017011704.439, "dur": 16.926, + "args": { + "External id": 2945421,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017011732.707, "dur": 16.578, + "args": { + "External id": 2945422,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011822.169, "dur": 18.463, + "args": { + "External id": 2945423,"Record function id": 0, "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011825.710, "dur": 14.101, + "args": { + "External id": 2945424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011833.265, "dur": 5.563, + "args": { + "External id": 2945425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011834.750, "dur": 3.960, + "args": { + "External id": 2945426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011844.491, "dur": 4.574, + "args": { + "External id": 2945427,"Record function id": 0, "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011845.940, "dur": 2.675, + "args": { + "External id": 2945428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011846.707, "dur": 1.456, + "args": { + "External id": 2945429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011847.371, "dur": 0.720, + "args": { + "External id": 2945430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011852.354, "dur": 4.890, + "args": { + "External id": 2945431,"Record function id": 0, "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011853.884, "dur": 2.930, + "args": { + "External id": 2945432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011854.679, "dur": 1.746, + "args": { + "External id": 2945433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011855.217, "dur": 1.094, + "args": { + "External id": 2945434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011860.438, "dur": 3.940, + "args": { + "External id": 2945435,"Record function id": 0, "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011861.494, "dur": 2.430, + "args": { + "External id": 2945436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011861.993, "dur": 1.385, + "args": { + "External id": 2945437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011862.588, "dur": 0.693, + "args": { + "External id": 2945438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011867.564, "dur": 3.657, + "args": { + "External id": 2945439,"Record function id": 0, "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011868.546, "dur": 2.272, + "args": { + "External id": 2945440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011869.044, "dur": 1.212, + "args": { + "External id": 2945441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011869.460, "dur": 0.716, + "args": { + "External id": 2945442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011874.329, "dur": 3.360, + "args": { + "External id": 2945443,"Record function id": 0, "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011875.292, "dur": 2.004, + "args": { + "External id": 2945444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011875.829, "dur": 1.047, + "args": { + "External id": 2945445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011876.130, "dur": 0.655, + "args": { + "External id": 2945446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011880.952, "dur": 13.109, + "args": { + "External id": 2945447,"Record function id": 0, "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011882.029, "dur": 11.628, + "args": { + "External id": 2945448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011882.521, "dur": 10.520, + "args": { + "External id": 2945449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011882.884, "dur": 10.098, + "args": { + "External id": 2945450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011897.112, "dur": 4.145, + "args": { + "External id": 2945451,"Record function id": 0, "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011898.701, "dur": 2.121, + "args": { + "External id": 2945452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011899.183, "dur": 1.251, + "args": { + "External id": 2945453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011899.448, "dur": 0.919, + "args": { + "External id": 2945454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011904.278, "dur": 5.584, + "args": { + "External id": 2945455,"Record function id": 0, "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017011905.550, "dur": 3.894, + "args": { + "External id": 2945456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011906.039, "dur": 3.005, + "args": { + "External id": 2945457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017011908.260, "dur": 0.713, + "args": { + "External id": 2945458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017011914.220, "dur": 36471.473, + "args": { + "External id": 2945459,"Record function id": 0, "Sequence number": 29294577, "Fwd thread id": 1, "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017011916.135, "dur": 36461.833, + "args": { + "External id": 2945460,"Sequence number": 29294577, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4531 + } + }, + { + "ph": "f", "id": 211, "pid": 1336755, "tid": 1381158, "ts": 1555017011916.135, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 1336755, "tid": 1381158, + "ts": 1555017011941.120, "dur": 38.456, + "args": { + "External id": 2945461,"Record function id": 0, "Ev Idx": 4532 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 1336755, "tid": 1381158, + "ts": 1555017012029.223, "dur": 74.616, + "args": { + "External id": 2945462,"Record function id": 0, "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 1336755, "tid": 1381158, + "ts": 1555017012110.760, "dur": 36259.858, + "args": { + "External id": 2945463,"Record function id": 0, "Ev Idx": 4534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017012218.244, "dur": 7.016, + "args": { + "External id": 2945464,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017012235.343, "dur": 4.899, + "args": { + "External id": 2945465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017012254.713, "dur": 35238.756, + "args": { + "External id": 2945466,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017012268.141, "dur": 35216.767, + "args": { + "External id": 2945467,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017012314.802, "dur": 13.133, + "args": { + "External id": 2945468,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017012334.201, "dur": 35111.922, + "args": { + "External id": 2945469,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017012336.946, "dur": 35108.520, + "args": { + "External id": 2945470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017012340.765, "dur": 4.936, + "args": { + "External id": 2945471,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017012347.379, "dur": 35094.505, + "args": { + "External id": 2945472,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017047580.684, "dur": 8.581, + "args": { + "External id": 2945473,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017047583.472, "dur": 5.399, + "args": { + "External id": 2945474,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017047619.467, "dur": 441.470, + "args": { + "External id": 2945475,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017047651.595, "dur": 404.302, + "args": { + "External id": 2945476,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4547, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017047662.399, "dur": 387.812, + "args": { + "External id": 2945477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017048084.352, "dur": 2.603, + "args": { + "External id": 2945478,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4549, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048159.276, "dur": 8.352, + "args": { + "External id": 2945479,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048211.060, "dur": 1.470, + "args": { + "External id": 2945480,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048228.686, "dur": 1.314, + "args": { + "External id": 2945481,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048241.659, "dur": 1.377, + "args": { + "External id": 2945482,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048259.164, "dur": 3.250, + "args": { + "External id": 2945483,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048273.176, "dur": 1.197, + "args": { + "External id": 2945484,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048287.003, "dur": 0.963, + "args": { + "External id": 2945485,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048299.457, "dur": 0.913, + "args": { + "External id": 2945486,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048309.744, "dur": 2.687, + "args": { + "External id": 2945487,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017048399.522, "dur": 2774.217, + "args": { + "External id": 2945488,"Record function id": 0, "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 1336755, "tid": 1381158, + "ts": 1555017048417.968, "dur": 1055.186, + "args": { + "External id": 2945489,"Record function id": 0, "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 1336755, "tid": 1381158, + "ts": 1555017048432.543, "dur": 306.867, + "args": { + "External id": 2945490,"Record function id": 0, "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048517.324, "dur": 3.833, + "args": { + "External id": 2945491,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048524.251, "dur": 1.647, + "args": { + "External id": 2945492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048529.387, "dur": 0.989, + "args": { + "External id": 2945493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048531.946, "dur": 0.886, + "args": { + "External id": 2945494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048534.495, "dur": 1.118, + "args": { + "External id": 2945495,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048537.074, "dur": 0.963, + "args": { + "External id": 2945496,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048541.451, "dur": 3.295, + "args": { + "External id": 2945497,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048546.354, "dur": 0.957, + "args": { + "External id": 2945498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048548.805, "dur": 1.308, + "args": { + "External id": 2945499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017048551.754, "dur": 1.091, + "args": { + "External id": 2945500,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017048571.191, "dur": 142.840, + "args": { + "External id": 2945501,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017048586.919, "dur": 123.022, + "args": { + "External id": 2945502,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017048603.008, "dur": 12.036, + "args": { + "External id": 2945503,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017048618.265, "dur": 63.958, + "args": { + "External id": 2945504,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017048620.557, "dur": 61.317, + "args": { + "External id": 2945505,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048624.565, "dur": 4.987, + "args": { + "External id": 2945506,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017048630.968, "dur": 50.335, + "args": { + "External id": 2945507,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 1336755, "tid": 1381158, + "ts": 1555017048832.571, "dur": 632.443, + "args": { + "External id": 2945508,"Record function id": 0, "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 1336755, "tid": 1381158, + "ts": 1555017048848.857, "dur": 604.444, + "args": { + "External id": 2945509,"Record function id": 0, "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017048908.751, "dur": 4.397, + "args": { + "External id": 2945510,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017048927.621, "dur": 27.209, + "args": { + "External id": 2945511,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048932.064, "dur": 2.224, + "args": { + "External id": 2945512,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048936.370, "dur": 1.938, + "args": { + "External id": 2945513,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048939.471, "dur": 0.457, + "args": { + "External id": 2945514,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048942.225, "dur": 0.453, + "args": { + "External id": 2945515,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048944.138, "dur": 0.592, + "args": { + "External id": 2945516,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048945.973, "dur": 0.629, + "args": { + "External id": 2945517,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048948.155, "dur": 0.632, + "args": { + "External id": 2945518,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048950.104, "dur": 0.254, + "args": { + "External id": 2945519,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017048951.677, "dur": 0.622, + "args": { + "External id": 2945520,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017048965.152, "dur": 75.583, + "args": { + "External id": 2945521,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017049075.373, "dur": 154.116, + "args": { + "External id": 2945522,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017049085.598, "dur": 6.075, + "args": { + "External id": 2945523,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017049097.593, "dur": 10.403, + "args": { + "External id": 2945524,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017049101.402, "dur": 6.206, + "args": { + "External id": 2945525,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049105.435, "dur": 0.610, + "args": { + "External id": 2945526,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017049116.004, "dur": 43.275, + "args": { + "External id": 2945527,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049118.468, "dur": 0.608, + "args": { + "External id": 2945528,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049120.765, "dur": 0.859, + "args": { + "External id": 2945529,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049123.074, "dur": 0.981, + "args": { + "External id": 2945530,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049125.654, "dur": 0.221, + "args": { + "External id": 2945531,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049127.344, "dur": 0.351, + "args": { + "External id": 2945532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049129.359, "dur": 0.441, + "args": { + "External id": 2945533,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049131.160, "dur": 1.977, + "args": { + "External id": 2945534,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049134.641, "dur": 0.372, + "args": { + "External id": 2945535,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017049136.365, "dur": 0.340, + "args": { + "External id": 2945536,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017049195.088, "dur": 26.413, + "args": { + "External id": 2945537,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017049275.208, "dur": 113.915, + "args": { + "External id": 2945538,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017049304.703, "dur": 80.875, + "args": { + "External id": 2945539,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4610, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017049314.071, "dur": 67.658, + "args": { + "External id": 2945540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017049404.391, "dur": 1.828, + "args": { + "External id": 2945541,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4612, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017049480.352, "dur": 1657.336, + "args": { + "External id": 2945542,"Sequence number": 29294576, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4613 + } + }, + { + "ph": "f", "id": 212, "pid": 1336755, "tid": 1381158, "ts": 1555017049480.352, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017049592.479, "dur": 103.568, + "args": { + "External id": 2945543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017049734.598, "dur": 40.812, + "args": { + "External id": 2945544,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017049791.758, "dur": 49.820, + "args": { + "External id": 2945545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017049850.362, "dur": 32.247, + "args": { + "External id": 2945546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017049893.916, "dur": 45.232, + "args": { + "External id": 2945547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017049947.872, "dur": 27.032, + "args": { + "External id": 2945548,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050024.387, "dur": 48.664, + "args": { + "External id": 2945549,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017050101.037, "dur": 24.346, + "args": { + "External id": 2945550,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017050159.258, "dur": 35.361, + "args": { + "External id": 2945551,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017050218.490, "dur": 20.174, + "args": { + "External id": 2945552,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017050251.464, "dur": 15.127, + "args": { + "External id": 2945553,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050277.935, "dur": 33.344, + "args": { + "External id": 2945554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050314.659, "dur": 32.993, + "args": { + "External id": 2945555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017050375.200, "dur": 172.712, + "args": { + "External id": 2945556,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017050450.775, "dur": 6.251, + "args": { + "External id": 2945557,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017050458.733, "dur": 4.003, + "args": { + "External id": 2945558,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017050582.367, "dur": 24.493, + "args": { + "External id": 2945559,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017050618.797, "dur": 16.455, + "args": { + "External id": 2945560,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050642.594, "dur": 35.591, + "args": { + "External id": 2945561,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050683.404, "dur": 34.655, + "args": { + "External id": 2945562,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050723.914, "dur": 21.786, + "args": { + "External id": 2945563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050752.427, "dur": 29.914, + "args": { + "External id": 2945564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050787.694, "dur": 24.113, + "args": { + "External id": 2945565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017050819.420, "dur": 45.585, + "args": { + "External id": 2945566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017050891.712, "dur": 28.301, + "args": { + "External id": 2945567,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017050937.263, "dur": 23.014, + "args": { + "External id": 2945568,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017050980.015, "dur": 55.393, + "args": { + "External id": 2945569,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017051056.372, "dur": 19.521, + "args": { + "External id": 2945570,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017051089.170, "dur": 19.219, + "args": { + "External id": 2945571,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051199.269, "dur": 14.173, + "args": { + "External id": 2945572,"Record function id": 0, "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051202.267, "dur": 10.308, + "args": { + "External id": 2945573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051206.260, "dur": 5.408, + "args": { + "External id": 2945574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051207.845, "dur": 3.707, + "args": { + "External id": 2945575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051217.173, "dur": 4.684, + "args": { + "External id": 2945576,"Record function id": 0, "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051218.566, "dur": 2.857, + "args": { + "External id": 2945577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051219.474, "dur": 1.403, + "args": { + "External id": 2945578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051220.007, "dur": 0.779, + "args": { + "External id": 2945579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051225.112, "dur": 4.398, + "args": { + "External id": 2945580,"Record function id": 0, "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051226.589, "dur": 2.458, + "args": { + "External id": 2945581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051227.168, "dur": 1.264, + "args": { + "External id": 2945582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051227.718, "dur": 0.597, + "args": { + "External id": 2945583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051232.675, "dur": 3.401, + "args": { + "External id": 2945584,"Record function id": 0, "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051233.535, "dur": 2.123, + "args": { + "External id": 2945585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051234.161, "dur": 0.975, + "args": { + "External id": 2945586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051234.441, "dur": 0.608, + "args": { + "External id": 2945587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051239.074, "dur": 9.560, + "args": { + "External id": 2945588,"Record function id": 0, "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051240.330, "dur": 7.802, + "args": { + "External id": 2945589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051240.899, "dur": 3.504, + "args": { + "External id": 2945590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051241.197, "dur": 3.101, + "args": { + "External id": 2945591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051251.712, "dur": 3.543, + "args": { + "External id": 2945592,"Record function id": 0, "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051252.668, "dur": 2.180, + "args": { + "External id": 2945593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051253.158, "dur": 1.281, + "args": { + "External id": 2945594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051253.532, "dur": 0.834, + "args": { + "External id": 2945595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051258.451, "dur": 5.736, + "args": { + "External id": 2945596,"Record function id": 0, "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051259.534, "dur": 4.237, + "args": { + "External id": 2945597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051260.108, "dur": 3.273, + "args": { + "External id": 2945598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051262.571, "dur": 0.685, + "args": { + "External id": 2945599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051267.290, "dur": 4.016, + "args": { + "External id": 2945600,"Record function id": 0, "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051268.499, "dur": 2.385, + "args": { + "External id": 2945601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051268.968, "dur": 1.534, + "args": { + "External id": 2945602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051269.627, "dur": 0.794, + "args": { + "External id": 2945603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051274.459, "dur": 4.149, + "args": { + "External id": 2945604,"Record function id": 0, "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017051275.652, "dur": 2.502, + "args": { + "External id": 2945605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051276.193, "dur": 1.532, + "args": { + "External id": 2945606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017051276.789, "dur": 0.871, + "args": { + "External id": 2945607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017051282.414, "dur": 37843.209, + "args": { + "External id": 2945608,"Record function id": 0, "Sequence number": 29294575, "Fwd thread id": 1, "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017051283.966, "dur": 37833.059, + "args": { + "External id": 2945609,"Sequence number": 29294575, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4680 + } + }, + { + "ph": "f", "id": 213, "pid": 1336755, "tid": 1381158, "ts": 1555017051283.966, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 1336755, "tid": 1381158, + "ts": 1555017051313.801, "dur": 38.223, + "args": { + "External id": 2945610,"Record function id": 0, "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 1336755, "tid": 1381158, + "ts": 1555017051359.420, "dur": 69.117, + "args": { + "External id": 2945611,"Record function id": 0, "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 1336755, "tid": 1381158, + "ts": 1555017051433.816, "dur": 37675.577, + "args": { + "External id": 2945612,"Record function id": 0, "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017051516.833, "dur": 6.526, + "args": { + "External id": 2945613,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017051531.869, "dur": 4.209, + "args": { + "External id": 2945614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017051550.844, "dur": 36684.076, + "args": { + "External id": 2945615,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017051563.147, "dur": 36663.649, + "args": { + "External id": 2945616,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017051604.921, "dur": 12.411, + "args": { + "External id": 2945617,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017051623.183, "dur": 36564.674, + "args": { + "External id": 2945618,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017051625.713, "dur": 36561.411, + "args": { + "External id": 2945619,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017051630.068, "dur": 6.696, + "args": { + "External id": 2945620,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017051638.439, "dur": 36545.327, + "args": { + "External id": 2945621,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017088322.070, "dur": 8.242, + "args": { + "External id": 2945622,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017088324.741, "dur": 5.252, + "args": { + "External id": 2945623,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017088359.146, "dur": 416.674, + "args": { + "External id": 2945624,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017088390.014, "dur": 380.960, + "args": { + "External id": 2945625,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4696, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017088400.743, "dur": 364.921, + "args": { + "External id": 2945626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017088797.764, "dur": 2.231, + "args": { + "External id": 2945627,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4698, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017088857.140, "dur": 6.157, + "args": { + "External id": 2945628,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017088903.921, "dur": 1.439, + "args": { + "External id": 2945629,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017088921.568, "dur": 1.567, + "args": { + "External id": 2945630,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017088935.207, "dur": 3.218, + "args": { + "External id": 2945631,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017088949.462, "dur": 0.792, + "args": { + "External id": 2945632,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017088960.239, "dur": 1.176, + "args": { + "External id": 2945633,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017088973.170, "dur": 0.926, + "args": { + "External id": 2945634,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089031.293, "dur": 3.445, + "args": { + "External id": 2945635,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089048.443, "dur": 0.824, + "args": { + "External id": 2945636,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017089139.768, "dur": 2707.232, + "args": { + "External id": 2945637,"Record function id": 0, "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 1336755, "tid": 1381158, + "ts": 1555017089173.776, "dur": 1035.555, + "args": { + "External id": 2945638,"Record function id": 0, "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 1336755, "tid": 1381158, + "ts": 1555017089188.715, "dur": 318.649, + "args": { + "External id": 2945639,"Record function id": 0, "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089274.270, "dur": 4.794, + "args": { + "External id": 2945640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089282.261, "dur": 1.186, + "args": { + "External id": 2945641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089286.677, "dur": 1.095, + "args": { + "External id": 2945642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089289.330, "dur": 0.976, + "args": { + "External id": 2945643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089291.830, "dur": 2.798, + "args": { + "External id": 2945644,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089296.151, "dur": 1.253, + "args": { + "External id": 2945645,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089300.036, "dur": 0.923, + "args": { + "External id": 2945646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089302.654, "dur": 1.396, + "args": { + "External id": 2945647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089305.531, "dur": 1.462, + "args": { + "External id": 2945648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017089308.405, "dur": 1.389, + "args": { + "External id": 2945649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017089328.934, "dur": 150.427, + "args": { + "External id": 2945650,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017089344.200, "dur": 130.981, + "args": { + "External id": 2945651,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017089364.476, "dur": 12.176, + "args": { + "External id": 2945652,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017089379.423, "dur": 68.211, + "args": { + "External id": 2945653,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017089381.961, "dur": 65.363, + "args": { + "External id": 2945654,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089388.449, "dur": 6.790, + "args": { + "External id": 2945655,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017089396.886, "dur": 49.691, + "args": { + "External id": 2945656,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 1336755, "tid": 1381158, + "ts": 1555017089594.451, "dur": 607.575, + "args": { + "External id": 2945657,"Record function id": 0, "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 1336755, "tid": 1381158, + "ts": 1555017089613.978, "dur": 575.376, + "args": { + "External id": 2945658,"Record function id": 0, "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017089674.030, "dur": 6.526, + "args": { + "External id": 2945659,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017089694.862, "dur": 28.562, + "args": { + "External id": 2945660,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089699.725, "dur": 1.538, + "args": { + "External id": 2945661,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089703.713, "dur": 0.378, + "args": { + "External id": 2945662,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089705.618, "dur": 1.297, + "args": { + "External id": 2945663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089708.777, "dur": 0.638, + "args": { + "External id": 2945664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089710.636, "dur": 0.369, + "args": { + "External id": 2945665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089713.323, "dur": 0.332, + "args": { + "External id": 2945666,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089714.967, "dur": 0.441, + "args": { + "External id": 2945667,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089716.605, "dur": 2.339, + "args": { + "External id": 2945668,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089720.807, "dur": 0.327, + "args": { + "External id": 2945669,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017089733.092, "dur": 32.828, + "args": { + "External id": 2945670,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017089795.529, "dur": 111.580, + "args": { + "External id": 2945671,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017089804.522, "dur": 3.189, + "args": { + "External id": 2945672,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017089813.059, "dur": 9.809, + "args": { + "External id": 2945673,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017089817.002, "dur": 5.464, + "args": { + "External id": 2945674,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089820.735, "dur": 0.529, + "args": { + "External id": 2945675,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017089830.153, "dur": 31.066, + "args": { + "External id": 2945676,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089832.346, "dur": 0.348, + "args": { + "External id": 2945677,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089834.713, "dur": 0.355, + "args": { + "External id": 2945678,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089839.817, "dur": 0.317, + "args": { + "External id": 2945679,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089841.486, "dur": 0.586, + "args": { + "External id": 2945680,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089843.515, "dur": 2.454, + "args": { + "External id": 2945681,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089847.341, "dur": 0.357, + "args": { + "External id": 2945682,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089849.066, "dur": 0.309, + "args": { + "External id": 2945683,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089851.153, "dur": 0.416, + "args": { + "External id": 2945684,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017089852.779, "dur": 0.391, + "args": { + "External id": 2945685,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017089879.638, "dur": 20.596, + "args": { + "External id": 2945686,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017089950.960, "dur": 152.535, + "args": { + "External id": 2945687,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017089975.192, "dur": 124.612, + "args": { + "External id": 2945688,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4759, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017090024.747, "dur": 70.637, + "args": { + "External id": 2945689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017090120.020, "dur": 1.857, + "args": { + "External id": 2945690,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4761, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017090220.133, "dur": 1604.354, + "args": { + "External id": 2945691,"Sequence number": 29294574, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4762 + } + }, + { + "ph": "f", "id": 214, "pid": 1336755, "tid": 1381158, "ts": 1555017090220.133, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090334.889, "dur": 103.743, + "args": { + "External id": 2945692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017090480.753, "dur": 39.774, + "args": { + "External id": 2945693,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090535.745, "dur": 52.768, + "args": { + "External id": 2945694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090597.917, "dur": 33.546, + "args": { + "External id": 2945695,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090637.373, "dur": 44.805, + "args": { + "External id": 2945696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090690.311, "dur": 26.841, + "args": { + "External id": 2945697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090724.592, "dur": 44.991, + "args": { + "External id": 2945698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017090793.073, "dur": 28.331, + "args": { + "External id": 2945699,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017090839.902, "dur": 30.597, + "args": { + "External id": 2945700,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017090889.545, "dur": 19.763, + "args": { + "External id": 2945701,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017090925.046, "dur": 14.924, + "args": { + "External id": 2945702,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090947.788, "dur": 27.658, + "args": { + "External id": 2945703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017090978.532, "dur": 74.750, + "args": { + "External id": 2945704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017091089.082, "dur": 183.211, + "args": { + "External id": 2945705,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017091177.944, "dur": 8.862, + "args": { + "External id": 2945706,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017091188.894, "dur": 2.758, + "args": { + "External id": 2945707,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017091309.157, "dur": 24.920, + "args": { + "External id": 2945708,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017091345.438, "dur": 16.368, + "args": { + "External id": 2945709,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017091369.033, "dur": 46.511, + "args": { + "External id": 2945710,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017091420.816, "dur": 35.646, + "args": { + "External id": 2945711,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017091462.800, "dur": 21.445, + "args": { + "External id": 2945712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017091490.710, "dur": 29.722, + "args": { + "External id": 2945713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017091526.161, "dur": 21.315, + "args": { + "External id": 2945714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017091554.128, "dur": 33.540, + "args": { + "External id": 2945715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017091606.489, "dur": 39.803, + "args": { + "External id": 2945716,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017091670.709, "dur": 30.775, + "args": { + "External id": 2945717,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017091716.259, "dur": 17.866, + "args": { + "External id": 2945718,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017091752.827, "dur": 13.924, + "args": { + "External id": 2945719,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017091779.408, "dur": 16.641, + "args": { + "External id": 2945720,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091870.822, "dur": 13.699, + "args": { + "External id": 2945721,"Record function id": 0, "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091873.944, "dur": 9.623, + "args": { + "External id": 2945722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091877.530, "dur": 5.144, + "args": { + "External id": 2945723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091878.887, "dur": 3.694, + "args": { + "External id": 2945724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091888.397, "dur": 5.755, + "args": { + "External id": 2945725,"Record function id": 0, "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091890.163, "dur": 3.537, + "args": { + "External id": 2945726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091891.288, "dur": 1.964, + "args": { + "External id": 2945727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091892.181, "dur": 0.978, + "args": { + "External id": 2945728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091897.419, "dur": 4.506, + "args": { + "External id": 2945729,"Record function id": 0, "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091898.581, "dur": 2.928, + "args": { + "External id": 2945730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091899.533, "dur": 1.441, + "args": { + "External id": 2945731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091900.063, "dur": 0.822, + "args": { + "External id": 2945732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091905.130, "dur": 4.271, + "args": { + "External id": 2945733,"Record function id": 0, "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091906.465, "dur": 2.540, + "args": { + "External id": 2945734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091907.348, "dur": 1.258, + "args": { + "External id": 2945735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091907.774, "dur": 0.760, + "args": { + "External id": 2945736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091912.498, "dur": 8.418, + "args": { + "External id": 2945737,"Record function id": 0, "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091913.541, "dur": 6.962, + "args": { + "External id": 2945738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091914.252, "dur": 5.810, + "args": { + "External id": 2945739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091914.539, "dur": 5.412, + "args": { + "External id": 2945740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091924.145, "dur": 5.567, + "args": { + "External id": 2945741,"Record function id": 0, "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091925.224, "dur": 4.083, + "args": { + "External id": 2945742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091926.134, "dur": 2.542, + "args": { + "External id": 2945743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091927.763, "dur": 0.839, + "args": { + "External id": 2945744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091932.941, "dur": 4.282, + "args": { + "External id": 2945745,"Record function id": 0, "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091934.201, "dur": 2.607, + "args": { + "External id": 2945746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091935.173, "dur": 1.116, + "args": { + "External id": 2945747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091935.465, "dur": 0.749, + "args": { + "External id": 2945748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091940.365, "dur": 4.594, + "args": { + "External id": 2945749,"Record function id": 0, "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091941.477, "dur": 3.078, + "args": { + "External id": 2945750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091942.423, "dur": 1.606, + "args": { + "External id": 2945751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091943.074, "dur": 0.882, + "args": { + "External id": 2945752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091948.037, "dur": 5.382, + "args": { + "External id": 2945753,"Record function id": 0, "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017091950.019, "dur": 2.981, + "args": { + "External id": 2945754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091951.089, "dur": 1.490, + "args": { + "External id": 2945755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017091951.628, "dur": 0.878, + "args": { + "External id": 2945756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017091957.125, "dur": 36685.195, + "args": { + "External id": 2945757,"Record function id": 0, "Sequence number": 29294573, "Fwd thread id": 1, "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017091958.454, "dur": 36675.852, + "args": { + "External id": 2945758,"Sequence number": 29294573, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4829 + } + }, + { + "ph": "f", "id": 215, "pid": 1336755, "tid": 1381158, "ts": 1555017091958.454, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 1336755, "tid": 1381158, + "ts": 1555017092030.317, "dur": 38.739, + "args": { + "External id": 2945759,"Record function id": 0, "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 1336755, "tid": 1381158, + "ts": 1555017092076.949, "dur": 83.461, + "args": { + "External id": 2945760,"Record function id": 0, "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 1336755, "tid": 1381158, + "ts": 1555017092168.120, "dur": 36458.741, + "args": { + "External id": 2945761,"Record function id": 0, "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017092263.695, "dur": 7.136, + "args": { + "External id": 2945762,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017092280.549, "dur": 4.922, + "args": { + "External id": 2945763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017092297.903, "dur": 35519.967, + "args": { + "External id": 2945764,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017092310.470, "dur": 35498.876, + "args": { + "External id": 2945765,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017092354.318, "dur": 16.273, + "args": { + "External id": 2945766,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017092376.289, "dur": 35395.024, + "args": { + "External id": 2945767,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017092379.782, "dur": 35390.862, + "args": { + "External id": 2945768,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017092383.738, "dur": 6.991, + "args": { + "External id": 2945769,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017092392.359, "dur": 35374.857, + "args": { + "External id": 2945770,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017127900.830, "dur": 8.239, + "args": { + "External id": 2945771,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017127903.343, "dur": 5.347, + "args": { + "External id": 2945772,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017127937.157, "dur": 405.534, + "args": { + "External id": 2945773,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017127964.293, "dur": 373.565, + "args": { + "External id": 2945774,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4845, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017127974.944, "dur": 357.463, + "args": { + "External id": 2945775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017128364.765, "dur": 2.392, + "args": { + "External id": 2945776,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4847, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128423.441, "dur": 6.652, + "args": { + "External id": 2945777,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128471.283, "dur": 1.419, + "args": { + "External id": 2945778,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128488.332, "dur": 1.382, + "args": { + "External id": 2945779,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128501.183, "dur": 3.492, + "args": { + "External id": 2945780,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128516.954, "dur": 1.123, + "args": { + "External id": 2945781,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128528.111, "dur": 0.757, + "args": { + "External id": 2945782,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128541.600, "dur": 1.111, + "args": { + "External id": 2945783,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128554.886, "dur": 3.382, + "args": { + "External id": 2945784,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128568.663, "dur": 0.835, + "args": { + "External id": 2945785,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017128655.962, "dur": 2752.324, + "args": { + "External id": 2945786,"Record function id": 0, "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 1336755, "tid": 1381158, + "ts": 1555017128674.114, "dur": 1011.270, + "args": { + "External id": 2945787,"Record function id": 0, "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 1336755, "tid": 1381158, + "ts": 1555017128689.803, "dur": 340.436, + "args": { + "External id": 2945788,"Record function id": 0, "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128770.664, "dur": 3.887, + "args": { + "External id": 2945789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128777.586, "dur": 1.193, + "args": { + "External id": 2945790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128782.190, "dur": 0.933, + "args": { + "External id": 2945791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128784.504, "dur": 1.027, + "args": { + "External id": 2945792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128787.097, "dur": 1.080, + "args": { + "External id": 2945793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128789.805, "dur": 2.674, + "args": { + "External id": 2945794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128795.685, "dur": 1.073, + "args": { + "External id": 2945795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128798.037, "dur": 1.042, + "args": { + "External id": 2945796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128800.604, "dur": 1.163, + "args": { + "External id": 2945797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017128803.128, "dur": 1.595, + "args": { + "External id": 2945798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017128823.373, "dur": 140.597, + "args": { + "External id": 2945799,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017128838.181, "dur": 121.840, + "args": { + "External id": 2945800,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017128855.980, "dur": 11.290, + "args": { + "External id": 2945801,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017128869.983, "dur": 62.992, + "args": { + "External id": 2945802,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017128872.516, "dur": 60.027, + "args": { + "External id": 2945803,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017128876.611, "dur": 6.231, + "args": { + "External id": 2945804,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017128884.242, "dur": 47.477, + "args": { + "External id": 2945805,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 1336755, "tid": 1381158, + "ts": 1555017129123.214, "dur": 555.009, + "args": { + "External id": 2945806,"Record function id": 0, "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 1336755, "tid": 1381158, + "ts": 1555017129157.122, "dur": 510.423, + "args": { + "External id": 2945807,"Record function id": 0, "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017129224.323, "dur": 6.075, + "args": { + "External id": 2945808,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017129245.317, "dur": 31.303, + "args": { + "External id": 2945809,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129250.137, "dur": 4.704, + "args": { + "External id": 2945810,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129256.873, "dur": 0.538, + "args": { + "External id": 2945811,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129258.796, "dur": 0.273, + "args": { + "External id": 2945812,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129261.667, "dur": 0.296, + "args": { + "External id": 2945813,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129263.181, "dur": 0.514, + "args": { + "External id": 2945814,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129264.969, "dur": 0.508, + "args": { + "External id": 2945815,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129267.890, "dur": 0.349, + "args": { + "External id": 2945816,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129269.546, "dur": 0.303, + "args": { + "External id": 2945817,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129271.016, "dur": 2.526, + "args": { + "External id": 2945818,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017129287.242, "dur": 35.754, + "args": { + "External id": 2945819,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017129353.283, "dur": 103.519, + "args": { + "External id": 2945820,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017129362.794, "dur": 3.236, + "args": { + "External id": 2945821,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017129371.667, "dur": 9.765, + "args": { + "External id": 2945822,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017129375.916, "dur": 5.125, + "args": { + "External id": 2945823,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129379.169, "dur": 0.682, + "args": { + "External id": 2945824,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017129388.951, "dur": 28.124, + "args": { + "External id": 2945825,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129391.818, "dur": 0.617, + "args": { + "External id": 2945826,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129394.160, "dur": 0.375, + "args": { + "External id": 2945827,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129396.094, "dur": 0.853, + "args": { + "External id": 2945828,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129398.475, "dur": 0.316, + "args": { + "External id": 2945829,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129400.289, "dur": 0.361, + "args": { + "External id": 2945830,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129402.349, "dur": 2.543, + "args": { + "External id": 2945831,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129406.065, "dur": 0.349, + "args": { + "External id": 2945832,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129407.716, "dur": 0.481, + "args": { + "External id": 2945833,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017129409.501, "dur": 0.432, + "args": { + "External id": 2945834,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017129428.593, "dur": 21.314, + "args": { + "External id": 2945835,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017129498.311, "dur": 108.482, + "args": { + "External id": 2945836,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017129525.110, "dur": 78.586, + "args": { + "External id": 2945837,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4908, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017129533.750, "dur": 66.059, + "args": { + "External id": 2945838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017129620.210, "dur": 1.820, + "args": { + "External id": 2945839,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4910, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017129692.473, "dur": 1694.461, + "args": { + "External id": 2945840,"Sequence number": 29294572, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4911 + } + }, + { + "ph": "f", "id": 216, "pid": 1336755, "tid": 1381158, "ts": 1555017129692.473, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017129798.900, "dur": 108.771, + "args": { + "External id": 2945841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017129947.745, "dur": 82.020, + "args": { + "External id": 2945842,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130049.641, "dur": 57.332, + "args": { + "External id": 2945843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130119.052, "dur": 48.557, + "args": { + "External id": 2945844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130180.064, "dur": 49.322, + "args": { + "External id": 2945845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130241.270, "dur": 28.961, + "args": { + "External id": 2945846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130278.015, "dur": 42.426, + "args": { + "External id": 2945847,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017130345.716, "dur": 25.206, + "args": { + "External id": 2945848,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017130388.874, "dur": 32.321, + "args": { + "External id": 2945849,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017130440.421, "dur": 20.462, + "args": { + "External id": 2945850,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017130477.161, "dur": 14.936, + "args": { + "External id": 2945851,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130500.356, "dur": 30.154, + "args": { + "External id": 2945852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130533.924, "dur": 34.028, + "args": { + "External id": 2945853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017130595.834, "dur": 169.743, + "args": { + "External id": 2945854,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017130671.240, "dur": 7.731, + "args": { + "External id": 2945855,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017130680.623, "dur": 2.890, + "args": { + "External id": 2945856,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017130802.030, "dur": 24.482, + "args": { + "External id": 2945857,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017130845.999, "dur": 17.254, + "args": { + "External id": 2945858,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130870.470, "dur": 34.986, + "args": { + "External id": 2945859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130911.227, "dur": 34.399, + "args": { + "External id": 2945860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017130952.978, "dur": 21.678, + "args": { + "External id": 2945861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017131017.993, "dur": 38.392, + "args": { + "External id": 2945862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017131065.473, "dur": 20.997, + "args": { + "External id": 2945863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017131092.908, "dur": 30.031, + "args": { + "External id": 2945864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017131158.724, "dur": 25.810, + "args": { + "External id": 2945865,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017131219.821, "dur": 33.277, + "args": { + "External id": 2945866,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017131274.193, "dur": 18.526, + "args": { + "External id": 2945867,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017131310.359, "dur": 14.822, + "args": { + "External id": 2945868,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017131338.576, "dur": 16.815, + "args": { + "External id": 2945869,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131432.815, "dur": 14.086, + "args": { + "External id": 2945870,"Record function id": 0, "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131435.770, "dur": 10.242, + "args": { + "External id": 2945871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131439.862, "dur": 5.234, + "args": { + "External id": 2945872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131441.241, "dur": 3.761, + "args": { + "External id": 2945873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131450.800, "dur": 5.336, + "args": { + "External id": 2945874,"Record function id": 0, "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131452.160, "dur": 3.451, + "args": { + "External id": 2945875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131453.051, "dur": 1.996, + "args": { + "External id": 2945876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131453.955, "dur": 1.000, + "args": { + "External id": 2945877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131459.344, "dur": 4.770, + "args": { + "External id": 2945878,"Record function id": 0, "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131460.667, "dur": 3.024, + "args": { + "External id": 2945879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131461.599, "dur": 1.562, + "args": { + "External id": 2945880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131462.330, "dur": 0.747, + "args": { + "External id": 2945881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131467.379, "dur": 3.962, + "args": { + "External id": 2945882,"Record function id": 0, "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131468.639, "dur": 2.277, + "args": { + "External id": 2945883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131469.298, "dur": 1.202, + "args": { + "External id": 2945884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131469.578, "dur": 0.847, + "args": { + "External id": 2945885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131474.409, "dur": 5.276, + "args": { + "External id": 2945886,"Record function id": 0, "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131475.538, "dur": 3.698, + "args": { + "External id": 2945887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131476.065, "dur": 2.742, + "args": { + "External id": 2945888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131476.362, "dur": 2.330, + "args": { + "External id": 2945889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131482.836, "dur": 3.758, + "args": { + "External id": 2945890,"Record function id": 0, "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131483.876, "dur": 2.297, + "args": { + "External id": 2945891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131484.379, "dur": 1.254, + "args": { + "External id": 2945892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131484.749, "dur": 0.813, + "args": { + "External id": 2945893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131493.088, "dur": 5.301, + "args": { + "External id": 2945894,"Record function id": 0, "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131494.080, "dur": 3.906, + "args": { + "External id": 2945895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131494.660, "dur": 2.773, + "args": { + "External id": 2945896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131496.635, "dur": 0.725, + "args": { + "External id": 2945897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131501.529, "dur": 3.733, + "args": { + "External id": 2945898,"Record function id": 0, "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131502.532, "dur": 2.324, + "args": { + "External id": 2945899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131503.400, "dur": 0.997, + "args": { + "External id": 2945900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131503.792, "dur": 0.532, + "args": { + "External id": 2945901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131508.317, "dur": 4.014, + "args": { + "External id": 2945902,"Record function id": 0, "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017131509.644, "dur": 2.294, + "args": { + "External id": 2945903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131510.211, "dur": 1.140, + "args": { + "External id": 2945904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017131510.620, "dur": 0.657, + "args": { + "External id": 2945905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017131516.033, "dur": 37251.855, + "args": { + "External id": 2945906,"Record function id": 0, "Sequence number": 29294571, "Fwd thread id": 1, "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017131517.157, "dur": 37242.084, + "args": { + "External id": 2945907,"Sequence number": 29294571, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4978 + } + }, + { + "ph": "f", "id": 217, "pid": 1336755, "tid": 1381158, "ts": 1555017131517.157, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 1336755, "tid": 1381158, + "ts": 1555017131543.656, "dur": 38.852, + "args": { + "External id": 2945908,"Record function id": 0, "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 1336755, "tid": 1381158, + "ts": 1555017131589.488, "dur": 64.387, + "args": { + "External id": 2945909,"Record function id": 0, "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 1336755, "tid": 1381158, + "ts": 1555017131660.755, "dur": 37091.034, + "args": { + "External id": 2945910,"Record function id": 0, "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017131743.596, "dur": 5.678, + "args": { + "External id": 2945911,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017131758.132, "dur": 4.457, + "args": { + "External id": 2945912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017131780.151, "dur": 36170.759, + "args": { + "External id": 2945913,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017131792.951, "dur": 36148.933, + "args": { + "External id": 2945914,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017131839.844, "dur": 12.782, + "args": { + "External id": 2945915,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017131858.469, "dur": 36042.728, + "args": { + "External id": 2945916,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017131861.083, "dur": 36039.307, + "args": { + "External id": 2945917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017131865.177, "dur": 5.844, + "args": { + "External id": 2945918,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017131872.706, "dur": 36024.088, + "args": { + "External id": 2945919,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017168066.685, "dur": 9.127, + "args": { + "External id": 2945920,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017168069.382, "dur": 5.903, + "args": { + "External id": 2945921,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017168103.769, "dur": 370.762, + "args": { + "External id": 2945922,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017168133.105, "dur": 336.452, + "args": { + "External id": 2945923,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4994, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017168155.803, "dur": 308.034, + "args": { + "External id": 2945924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017168496.178, "dur": 2.274, + "args": { + "External id": 2945925,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4996, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168555.347, "dur": 6.579, + "args": { + "External id": 2945926,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168603.116, "dur": 1.540, + "args": { + "External id": 2945927,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168619.402, "dur": 1.612, + "args": { + "External id": 2945928,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168632.336, "dur": 2.489, + "args": { + "External id": 2945929,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168645.488, "dur": 0.913, + "args": { + "External id": 2945930,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168657.036, "dur": 1.022, + "args": { + "External id": 2945931,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168669.993, "dur": 1.219, + "args": { + "External id": 2945932,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168681.394, "dur": 3.668, + "args": { + "External id": 2945933,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017168694.618, "dur": 0.686, + "args": { + "External id": 2945934,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017168781.220, "dur": 2738.525, + "args": { + "External id": 2945935,"Record function id": 0, "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 1336755, "tid": 1381158, + "ts": 1555017168799.359, "dur": 999.806, + "args": { + "External id": 2945936,"Record function id": 0, "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 1336755, "tid": 1381158, + "ts": 1555017168814.205, "dur": 359.152, + "args": { + "External id": 2945937,"Record function id": 0, "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168894.425, "dur": 3.609, + "args": { + "External id": 2945938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168900.886, "dur": 1.111, + "args": { + "External id": 2945939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168905.190, "dur": 1.011, + "args": { + "External id": 2945940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168907.685, "dur": 0.962, + "args": { + "External id": 2945941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168910.199, "dur": 0.910, + "args": { + "External id": 2945942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168912.592, "dur": 2.978, + "args": { + "External id": 2945943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168918.030, "dur": 0.860, + "args": { + "External id": 2945944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168920.325, "dur": 0.819, + "args": { + "External id": 2945945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168922.557, "dur": 1.287, + "args": { + "External id": 2945946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017168925.412, "dur": 1.183, + "args": { + "External id": 2945947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017168945.155, "dur": 181.444, + "args": { + "External id": 2945948,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017168960.212, "dur": 161.828, + "args": { + "External id": 2945949,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017168976.804, "dur": 48.171, + "args": { + "External id": 2945950,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017169029.226, "dur": 65.407, + "args": { + "External id": 2945951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017169031.690, "dur": 62.647, + "args": { + "External id": 2945952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169035.473, "dur": 6.744, + "args": { + "External id": 2945953,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017169043.889, "dur": 49.943, + "args": { + "External id": 2945954,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 1336755, "tid": 1381158, + "ts": 1555017169268.509, "dur": 523.924, + "args": { + "External id": 2945955,"Record function id": 0, "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 1336755, "tid": 1381158, + "ts": 1555017169288.131, "dur": 492.851, + "args": { + "External id": 2945956,"Record function id": 0, "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017169353.458, "dur": 5.417, + "args": { + "External id": 2945957,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017169373.972, "dur": 28.160, + "args": { + "External id": 2945958,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169378.390, "dur": 3.408, + "args": { + "External id": 2945959,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169383.968, "dur": 0.403, + "args": { + "External id": 2945960,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169385.480, "dur": 0.464, + "args": { + "External id": 2945961,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169387.591, "dur": 1.054, + "args": { + "External id": 2945962,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169389.912, "dur": 0.588, + "args": { + "External id": 2945963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169391.697, "dur": 0.547, + "args": { + "External id": 2945964,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169394.234, "dur": 0.357, + "args": { + "External id": 2945965,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169395.970, "dur": 0.251, + "args": { + "External id": 2945966,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169397.489, "dur": 1.844, + "args": { + "External id": 2945967,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017169412.893, "dur": 35.378, + "args": { + "External id": 2945968,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017169477.815, "dur": 96.515, + "args": { + "External id": 2945969,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017169487.406, "dur": 3.003, + "args": { + "External id": 2945970,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017169495.935, "dur": 9.774, + "args": { + "External id": 2945971,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017169499.779, "dur": 5.522, + "args": { + "External id": 2945972,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169502.941, "dur": 1.083, + "args": { + "External id": 2945973,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017169512.728, "dur": 23.421, + "args": { + "External id": 2945974,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169514.447, "dur": 0.748, + "args": { + "External id": 2945975,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169516.197, "dur": 0.930, + "args": { + "External id": 2945976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169518.269, "dur": 0.435, + "args": { + "External id": 2945977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169520.120, "dur": 0.523, + "args": { + "External id": 2945978,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169521.541, "dur": 0.687, + "args": { + "External id": 2945979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169523.256, "dur": 2.351, + "args": { + "External id": 2945980,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169526.723, "dur": 0.281, + "args": { + "External id": 2945981,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169527.671, "dur": 0.651, + "args": { + "External id": 2945982,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017169529.209, "dur": 0.329, + "args": { + "External id": 2945983,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017169547.816, "dur": 19.746, + "args": { + "External id": 2945984,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017169614.180, "dur": 108.261, + "args": { + "External id": 2945985,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017169639.037, "dur": 80.187, + "args": { + "External id": 2945986,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5057, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017169647.867, "dur": 67.165, + "args": { + "External id": 2945987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017169734.760, "dur": 1.701, + "args": { + "External id": 2945988,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5059, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017169806.073, "dur": 1688.880, + "args": { + "External id": 2945989,"Sequence number": 29294570, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5060 + } + }, + { + "ph": "f", "id": 218, "pid": 1336755, "tid": 1381158, "ts": 1555017169806.073, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017169913.757, "dur": 145.376, + "args": { + "External id": 2945990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017170104.717, "dur": 55.808, + "args": { + "External id": 2945991,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170180.272, "dur": 58.664, + "args": { + "External id": 2945992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170248.851, "dur": 32.917, + "args": { + "External id": 2945993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170288.314, "dur": 45.519, + "args": { + "External id": 2945994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170342.072, "dur": 26.519, + "args": { + "External id": 2945995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170375.306, "dur": 45.706, + "args": { + "External id": 2945996,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017170448.236, "dur": 27.811, + "args": { + "External id": 2945997,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017170494.166, "dur": 33.115, + "args": { + "External id": 2945998,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017170546.862, "dur": 23.327, + "args": { + "External id": 2945999,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017170583.736, "dur": 15.300, + "args": { + "External id": 2946000,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170608.523, "dur": 32.718, + "args": { + "External id": 2946001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170644.381, "dur": 33.910, + "args": { + "External id": 2946002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017170707.850, "dur": 163.908, + "args": { + "External id": 2946003,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017170780.305, "dur": 5.945, + "args": { + "External id": 2946004,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017170788.290, "dur": 4.538, + "args": { + "External id": 2946005,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017170905.442, "dur": 25.261, + "args": { + "External id": 2946006,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017170942.810, "dur": 16.313, + "args": { + "External id": 2946007,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017170966.178, "dur": 76.395, + "args": { + "External id": 2946008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017171050.758, "dur": 42.938, + "args": { + "External id": 2946009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017171101.023, "dur": 22.985, + "args": { + "External id": 2946010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017171134.060, "dur": 46.679, + "args": { + "External id": 2946011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017171189.373, "dur": 22.010, + "args": { + "External id": 2946012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017171218.351, "dur": 29.915, + "args": { + "External id": 2946013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017171270.855, "dur": 35.346, + "args": { + "External id": 2946014,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017171331.760, "dur": 33.509, + "args": { + "External id": 2946015,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017171386.332, "dur": 21.486, + "args": { + "External id": 2946016,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017171422.472, "dur": 16.138, + "args": { + "External id": 2946017,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017171451.096, "dur": 16.536, + "args": { + "External id": 2946018,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171540.791, "dur": 13.798, + "args": { + "External id": 2946019,"Record function id": 0, "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171544.026, "dur": 9.677, + "args": { + "External id": 2946020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171547.804, "dur": 4.937, + "args": { + "External id": 2946021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171549.117, "dur": 3.536, + "args": { + "External id": 2946022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171558.306, "dur": 5.249, + "args": { + "External id": 2946023,"Record function id": 0, "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171560.169, "dur": 2.851, + "args": { + "External id": 2946024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171561.201, "dur": 1.283, + "args": { + "External id": 2946025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171561.638, "dur": 0.773, + "args": { + "External id": 2946026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171566.929, "dur": 4.484, + "args": { + "External id": 2946027,"Record function id": 0, "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171568.642, "dur": 2.371, + "args": { + "External id": 2946028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171569.280, "dur": 1.332, + "args": { + "External id": 2946029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171569.721, "dur": 0.816, + "args": { + "External id": 2946030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171577.708, "dur": 3.931, + "args": { + "External id": 2946031,"Record function id": 0, "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171578.811, "dur": 2.381, + "args": { + "External id": 2946032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171579.614, "dur": 1.076, + "args": { + "External id": 2946033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171579.908, "dur": 0.668, + "args": { + "External id": 2946034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171584.680, "dur": 5.824, + "args": { + "External id": 2946035,"Record function id": 0, "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171585.738, "dur": 4.326, + "args": { + "External id": 2946036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171586.412, "dur": 3.063, + "args": { + "External id": 2946037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171586.747, "dur": 2.621, + "args": { + "External id": 2946038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171593.553, "dur": 5.779, + "args": { + "External id": 2946039,"Record function id": 0, "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171594.871, "dur": 3.974, + "args": { + "External id": 2946040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171595.419, "dur": 2.890, + "args": { + "External id": 2946041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171597.307, "dur": 0.940, + "args": { + "External id": 2946042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171602.507, "dur": 3.717, + "args": { + "External id": 2946043,"Record function id": 0, "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171603.640, "dur": 2.167, + "args": { + "External id": 2946044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171604.140, "dur": 1.079, + "args": { + "External id": 2946045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171604.443, "dur": 0.661, + "args": { + "External id": 2946046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171609.233, "dur": 27.005, + "args": { + "External id": 2946047,"Record function id": 0, "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171610.265, "dur": 25.556, + "args": { + "External id": 2946048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171634.023, "dur": 1.158, + "args": { + "External id": 2946049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171634.485, "dur": 0.628, + "args": { + "External id": 2946050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171639.288, "dur": 6.616, + "args": { + "External id": 2946051,"Record function id": 0, "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017171641.388, "dur": 4.010, + "args": { + "External id": 2946052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171642.146, "dur": 2.647, + "args": { + "External id": 2946053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017171644.138, "dur": 0.541, + "args": { + "External id": 2946054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017171649.833, "dur": 40442.780, + "args": { + "External id": 2946055,"Record function id": 0, "Sequence number": 29294569, "Fwd thread id": 1, "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017171651.208, "dur": 40432.948, + "args": { + "External id": 2946056,"Sequence number": 29294569, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5127 + } + }, + { + "ph": "f", "id": 219, "pid": 1336755, "tid": 1381158, "ts": 1555017171651.208, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 1336755, "tid": 1381158, + "ts": 1555017171678.755, "dur": 36.805, + "args": { + "External id": 2946057,"Record function id": 0, "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 1336755, "tid": 1381158, + "ts": 1555017171722.792, "dur": 63.917, + "args": { + "External id": 2946058,"Record function id": 0, "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 1336755, "tid": 1381158, + "ts": 1555017171792.310, "dur": 40283.331, + "args": { + "External id": 2946059,"Record function id": 0, "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017171878.770, "dur": 6.276, + "args": { + "External id": 2946060,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017171893.648, "dur": 4.133, + "args": { + "External id": 2946061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017171910.500, "dur": 39191.014, + "args": { + "External id": 2946062,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017171922.762, "dur": 39170.319, + "args": { + "External id": 2946063,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017171964.173, "dur": 12.744, + "args": { + "External id": 2946064,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017172038.746, "dur": 39016.349, + "args": { + "External id": 2946065,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017172041.916, "dur": 39012.648, + "args": { + "External id": 2946066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017172046.130, "dur": 8.750, + "args": { + "External id": 2946067,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017172056.953, "dur": 38994.223, + "args": { + "External id": 2946068,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017211198.419, "dur": 8.208, + "args": { + "External id": 2946069,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017211201.066, "dur": 5.130, + "args": { + "External id": 2946070,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017211233.250, "dur": 508.389, + "args": { + "External id": 2946071,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017211259.303, "dur": 476.792, + "args": { + "External id": 2946072,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5143, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017211270.353, "dur": 459.963, + "args": { + "External id": 2946073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017211761.937, "dur": 2.549, + "args": { + "External id": 2946074,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5145, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211824.085, "dur": 6.564, + "args": { + "External id": 2946075,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211871.598, "dur": 1.560, + "args": { + "External id": 2946076,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211888.409, "dur": 1.353, + "args": { + "External id": 2946077,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211901.869, "dur": 3.426, + "args": { + "External id": 2946078,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211917.531, "dur": 0.946, + "args": { + "External id": 2946079,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211928.515, "dur": 0.962, + "args": { + "External id": 2946080,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211941.497, "dur": 1.078, + "args": { + "External id": 2946081,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211953.254, "dur": 3.020, + "args": { + "External id": 2946082,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017211970.007, "dur": 0.907, + "args": { + "External id": 2946083,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017212107.511, "dur": 2689.762, + "args": { + "External id": 2946084,"Record function id": 0, "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 1336755, "tid": 1381158, + "ts": 1555017212126.176, "dur": 1035.738, + "args": { + "External id": 2946085,"Record function id": 0, "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 1336755, "tid": 1381158, + "ts": 1555017212153.701, "dur": 320.579, + "args": { + "External id": 2946086,"Record function id": 0, "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212242.296, "dur": 4.441, + "args": { + "External id": 2946087,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212249.660, "dur": 1.150, + "args": { + "External id": 2946088,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212253.955, "dur": 1.096, + "args": { + "External id": 2946089,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212256.601, "dur": 1.025, + "args": { + "External id": 2946090,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212258.798, "dur": 0.887, + "args": { + "External id": 2946091,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212260.848, "dur": 3.232, + "args": { + "External id": 2946092,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212266.705, "dur": 1.069, + "args": { + "External id": 2946093,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212269.213, "dur": 1.192, + "args": { + "External id": 2946094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212271.726, "dur": 0.918, + "args": { + "External id": 2946095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017212274.076, "dur": 1.195, + "args": { + "External id": 2946096,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017212294.317, "dur": 151.873, + "args": { + "External id": 2946097,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017212309.897, "dur": 132.090, + "args": { + "External id": 2946098,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017212327.220, "dur": 12.339, + "args": { + "External id": 2946099,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017212342.494, "dur": 67.952, + "args": { + "External id": 2946100,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017212344.883, "dur": 65.293, + "args": { + "External id": 2946101,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212348.417, "dur": 5.887, + "args": { + "External id": 2946102,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017212355.860, "dur": 53.793, + "args": { + "External id": 2946103,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 1336755, "tid": 1381158, + "ts": 1555017212568.417, "dur": 567.933, + "args": { + "External id": 2946104,"Record function id": 0, "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 1336755, "tid": 1381158, + "ts": 1555017212584.359, "dur": 539.385, + "args": { + "External id": 2946105,"Record function id": 0, "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017212644.781, "dur": 4.546, + "args": { + "External id": 2946106,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017212666.398, "dur": 26.451, + "args": { + "External id": 2946107,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212670.861, "dur": 4.206, + "args": { + "External id": 2946108,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212676.531, "dur": 0.604, + "args": { + "External id": 2946109,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212678.027, "dur": 0.561, + "args": { + "External id": 2946110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212680.278, "dur": 0.242, + "args": { + "External id": 2946111,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212681.705, "dur": 0.473, + "args": { + "External id": 2946112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212682.990, "dur": 0.523, + "args": { + "External id": 2946113,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212684.831, "dur": 0.585, + "args": { + "External id": 2946114,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212686.496, "dur": 0.458, + "args": { + "External id": 2946115,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212687.657, "dur": 2.865, + "args": { + "External id": 2946116,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017212703.015, "dur": 32.240, + "args": { + "External id": 2946117,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017212763.717, "dur": 98.115, + "args": { + "External id": 2946118,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017212772.674, "dur": 3.149, + "args": { + "External id": 2946119,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017212780.615, "dur": 9.011, + "args": { + "External id": 2946120,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017212784.406, "dur": 4.780, + "args": { + "External id": 2946121,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212787.485, "dur": 0.628, + "args": { + "External id": 2946122,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017212795.643, "dur": 25.123, + "args": { + "External id": 2946123,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212800.193, "dur": 0.570, + "args": { + "External id": 2946124,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212801.993, "dur": 0.811, + "args": { + "External id": 2946125,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212803.907, "dur": 1.024, + "args": { + "External id": 2946126,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212805.813, "dur": 0.588, + "args": { + "External id": 2946127,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212807.329, "dur": 0.499, + "args": { + "External id": 2946128,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212808.964, "dur": 2.077, + "args": { + "External id": 2946129,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212811.842, "dur": 0.429, + "args": { + "External id": 2946130,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212812.978, "dur": 0.567, + "args": { + "External id": 2946131,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017212814.672, "dur": 0.558, + "args": { + "External id": 2946132,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017212831.409, "dur": 20.795, + "args": { + "External id": 2946133,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017212902.046, "dur": 150.343, + "args": { + "External id": 2946134,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017212927.204, "dur": 121.425, + "args": { + "External id": 2946135,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5206, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017212936.334, "dur": 107.726, + "args": { + "External id": 2946136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017213070.840, "dur": 2.208, + "args": { + "External id": 2946137,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5208, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017213170.884, "dur": 1602.903, + "args": { + "External id": 2946138,"Sequence number": 29294568, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5209 + } + }, + { + "ph": "f", "id": 220, "pid": 1336755, "tid": 1381158, "ts": 1555017213170.884, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213280.885, "dur": 104.583, + "args": { + "External id": 2946139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017213427.644, "dur": 43.200, + "args": { + "External id": 2946140,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213486.534, "dur": 49.073, + "args": { + "External id": 2946141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213545.112, "dur": 30.915, + "args": { + "External id": 2946142,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213582.257, "dur": 45.273, + "args": { + "External id": 2946143,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213635.859, "dur": 27.187, + "args": { + "External id": 2946144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213670.026, "dur": 42.577, + "args": { + "External id": 2946145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017213734.744, "dur": 25.231, + "args": { + "External id": 2946146,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017213778.932, "dur": 31.907, + "args": { + "External id": 2946147,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017213830.559, "dur": 19.207, + "args": { + "External id": 2946148,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017213862.467, "dur": 14.585, + "args": { + "External id": 2946149,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213887.714, "dur": 30.098, + "args": { + "External id": 2946150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017213920.743, "dur": 33.336, + "args": { + "External id": 2946151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017213979.040, "dur": 237.528, + "args": { + "External id": 2946152,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017214098.110, "dur": 10.813, + "args": { + "External id": 2946153,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017214111.054, "dur": 2.942, + "args": { + "External id": 2946154,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017214255.335, "dur": 30.091, + "args": { + "External id": 2946155,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017214296.257, "dur": 16.050, + "args": { + "External id": 2946156,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017214322.431, "dur": 46.081, + "args": { + "External id": 2946157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017214374.321, "dur": 38.822, + "args": { + "External id": 2946158,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017214419.918, "dur": 21.500, + "args": { + "External id": 2946159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017214445.606, "dur": 33.490, + "args": { + "External id": 2946160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017214486.629, "dur": 20.808, + "args": { + "External id": 2946161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017214513.778, "dur": 33.315, + "args": { + "External id": 2946162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017214565.191, "dur": 38.046, + "args": { + "External id": 2946163,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017214628.103, "dur": 28.535, + "args": { + "External id": 2946164,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017214671.224, "dur": 18.348, + "args": { + "External id": 2946165,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017214705.175, "dur": 15.025, + "args": { + "External id": 2946166,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017214731.160, "dur": 15.961, + "args": { + "External id": 2946167,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214818.181, "dur": 15.468, + "args": { + "External id": 2946168,"Record function id": 0, "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214821.075, "dur": 11.776, + "args": { + "External id": 2946169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214824.698, "dur": 7.241, + "args": { + "External id": 2946170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214826.196, "dur": 5.606, + "args": { + "External id": 2946171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214840.273, "dur": 5.313, + "args": { + "External id": 2946172,"Record function id": 0, "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214841.744, "dur": 3.393, + "args": { + "External id": 2946173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214843.120, "dur": 1.421, + "args": { + "External id": 2946174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214843.616, "dur": 0.854, + "args": { + "External id": 2946175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214848.809, "dur": 4.533, + "args": { + "External id": 2946176,"Record function id": 0, "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214850.317, "dur": 2.624, + "args": { + "External id": 2946177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214850.979, "dur": 1.272, + "args": { + "External id": 2946178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214851.428, "dur": 0.736, + "args": { + "External id": 2946179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214856.465, "dur": 4.721, + "args": { + "External id": 2946180,"Record function id": 0, "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214857.977, "dur": 2.815, + "args": { + "External id": 2946181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214858.850, "dur": 1.256, + "args": { + "External id": 2946182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214859.155, "dur": 0.876, + "args": { + "External id": 2946183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214864.259, "dur": 4.180, + "args": { + "External id": 2946184,"Record function id": 0, "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214865.482, "dur": 2.567, + "args": { + "External id": 2946185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214866.241, "dur": 1.264, + "args": { + "External id": 2946186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214866.742, "dur": 0.664, + "args": { + "External id": 2946187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214871.499, "dur": 4.299, + "args": { + "External id": 2946188,"Record function id": 0, "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214872.922, "dur": 2.463, + "args": { + "External id": 2946189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214873.587, "dur": 1.216, + "args": { + "External id": 2946190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214873.939, "dur": 0.790, + "args": { + "External id": 2946191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214878.986, "dur": 4.888, + "args": { + "External id": 2946192,"Record function id": 0, "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214880.138, "dur": 3.323, + "args": { + "External id": 2946193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214880.644, "dur": 2.337, + "args": { + "External id": 2946194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214882.229, "dur": 0.689, + "args": { + "External id": 2946195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214886.931, "dur": 9.330, + "args": { + "External id": 2946196,"Record function id": 0, "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214888.248, "dur": 7.604, + "args": { + "External id": 2946197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214892.479, "dur": 2.849, + "args": { + "External id": 2946198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214892.797, "dur": 2.459, + "args": { + "External id": 2946199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214899.296, "dur": 4.084, + "args": { + "External id": 2946200,"Record function id": 0, "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017214900.529, "dur": 2.413, + "args": { + "External id": 2946201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214901.071, "dur": 1.331, + "args": { + "External id": 2946202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017214901.365, "dur": 0.975, + "args": { + "External id": 2946203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017214907.742, "dur": 36689.849, + "args": { + "External id": 2946204,"Record function id": 0, "Sequence number": 29294567, "Fwd thread id": 1, "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017214908.968, "dur": 36680.331, + "args": { + "External id": 2946205,"Sequence number": 29294567, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5276 + } + }, + { + "ph": "f", "id": 221, "pid": 1336755, "tid": 1381158, "ts": 1555017214908.968, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 1336755, "tid": 1381158, + "ts": 1555017214933.944, "dur": 36.927, + "args": { + "External id": 2946206,"Record function id": 0, "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 1336755, "tid": 1381158, + "ts": 1555017214978.209, "dur": 107.073, + "args": { + "External id": 2946207,"Record function id": 0, "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 1336755, "tid": 1381158, + "ts": 1555017215092.915, "dur": 36489.224, + "args": { + "External id": 2946208,"Record function id": 0, "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017215192.413, "dur": 7.544, + "args": { + "External id": 2946209,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017215210.507, "dur": 4.994, + "args": { + "External id": 2946210,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017215229.430, "dur": 35545.747, + "args": { + "External id": 2946211,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017215242.802, "dur": 35524.264, + "args": { + "External id": 2946212,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017215284.956, "dur": 13.519, + "args": { + "External id": 2946213,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017215304.465, "dur": 35423.189, + "args": { + "External id": 2946214,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017215306.896, "dur": 35420.107, + "args": { + "External id": 2946215,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017215310.481, "dur": 5.161, + "args": { + "External id": 2946216,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017215317.442, "dur": 35406.073, + "args": { + "External id": 2946217,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017250857.994, "dur": 8.193, + "args": { + "External id": 2946218,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017250860.535, "dur": 5.300, + "args": { + "External id": 2946219,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017250893.592, "dur": 405.678, + "args": { + "External id": 2946220,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017250920.703, "dur": 373.604, + "args": { + "External id": 2946221,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5292, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017250932.123, "dur": 356.064, + "args": { + "External id": 2946222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017251320.867, "dur": 2.108, + "args": { + "External id": 2946223,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5294, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251381.597, "dur": 5.940, + "args": { + "External id": 2946224,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251429.800, "dur": 1.466, + "args": { + "External id": 2946225,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251446.600, "dur": 2.928, + "args": { + "External id": 2946226,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251461.916, "dur": 1.157, + "args": { + "External id": 2946227,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251474.442, "dur": 0.883, + "args": { + "External id": 2946228,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251484.359, "dur": 1.228, + "args": { + "External id": 2946229,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251496.790, "dur": 3.284, + "args": { + "External id": 2946230,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251514.345, "dur": 1.298, + "args": { + "External id": 2946231,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251525.310, "dur": 0.936, + "args": { + "External id": 2946232,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017251610.734, "dur": 2775.819, + "args": { + "External id": 2946233,"Record function id": 0, "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 1336755, "tid": 1381158, + "ts": 1555017251629.631, "dur": 1027.623, + "args": { + "External id": 2946234,"Record function id": 0, "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 1336755, "tid": 1381158, + "ts": 1555017251643.279, "dur": 307.985, + "args": { + "External id": 2946235,"Record function id": 0, "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251726.982, "dur": 3.501, + "args": { + "External id": 2946236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251733.392, "dur": 1.507, + "args": { + "External id": 2946237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251738.557, "dur": 1.022, + "args": { + "External id": 2946238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251741.121, "dur": 2.665, + "args": { + "External id": 2946239,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251745.266, "dur": 0.836, + "args": { + "External id": 2946240,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251747.389, "dur": 1.190, + "args": { + "External id": 2946241,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251751.657, "dur": 1.283, + "args": { + "External id": 2946242,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251755.339, "dur": 1.049, + "args": { + "External id": 2946243,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251758.129, "dur": 1.152, + "args": { + "External id": 2946244,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017251760.841, "dur": 0.939, + "args": { + "External id": 2946245,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017251780.525, "dur": 145.251, + "args": { + "External id": 2946246,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017251796.079, "dur": 125.688, + "args": { + "External id": 2946247,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017251813.221, "dur": 12.138, + "args": { + "External id": 2946248,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017251828.172, "dur": 65.844, + "args": { + "External id": 2946249,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017251831.454, "dur": 62.226, + "args": { + "External id": 2946250,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017251835.071, "dur": 8.033, + "args": { + "External id": 2946251,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017251844.680, "dur": 48.485, + "args": { + "External id": 2946252,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 1336755, "tid": 1381158, + "ts": 1555017252089.871, "dur": 559.460, + "args": { + "External id": 2946253,"Record function id": 0, "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 1336755, "tid": 1381158, + "ts": 1555017252105.992, "dur": 532.033, + "args": { + "External id": 2946254,"Record function id": 0, "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017252186.361, "dur": 5.963, + "args": { + "External id": 2946255,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017252214.614, "dur": 28.703, + "args": { + "External id": 2946256,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252220.511, "dur": 1.726, + "args": { + "External id": 2946257,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252224.240, "dur": 0.684, + "args": { + "External id": 2946258,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252226.676, "dur": 0.352, + "args": { + "External id": 2946259,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252228.447, "dur": 0.582, + "args": { + "External id": 2946260,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252230.531, "dur": 0.421, + "args": { + "External id": 2946261,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252232.885, "dur": 0.348, + "args": { + "External id": 2946262,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252234.325, "dur": 2.588, + "args": { + "External id": 2946263,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252238.552, "dur": 0.342, + "args": { + "External id": 2946264,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252240.262, "dur": 0.276, + "args": { + "External id": 2946265,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017252254.076, "dur": 40.497, + "args": { + "External id": 2946266,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017252324.865, "dur": 104.316, + "args": { + "External id": 2946267,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017252338.220, "dur": 3.208, + "args": { + "External id": 2946268,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017252346.800, "dur": 9.381, + "args": { + "External id": 2946269,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017252350.858, "dur": 4.923, + "args": { + "External id": 2946270,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252354.060, "dur": 0.530, + "args": { + "External id": 2946271,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017252363.282, "dur": 28.724, + "args": { + "External id": 2946272,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252365.544, "dur": 0.380, + "args": { + "External id": 2946273,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252367.850, "dur": 0.516, + "args": { + "External id": 2946274,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252369.592, "dur": 0.359, + "args": { + "External id": 2946275,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252371.685, "dur": 2.192, + "args": { + "External id": 2946276,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252375.099, "dur": 0.762, + "args": { + "External id": 2946277,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252377.479, "dur": 0.446, + "args": { + "External id": 2946278,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252381.624, "dur": 0.359, + "args": { + "External id": 2946279,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252383.492, "dur": 0.361, + "args": { + "External id": 2946280,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017252385.229, "dur": 0.420, + "args": { + "External id": 2946281,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017252402.048, "dur": 20.004, + "args": { + "External id": 2946282,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017252472.659, "dur": 104.287, + "args": { + "External id": 2946283,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017252495.531, "dur": 78.369, + "args": { + "External id": 2946284,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5355, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017252505.186, "dur": 64.690, + "args": { + "External id": 2946285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017252591.704, "dur": 1.718, + "args": { + "External id": 2946286,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5357, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017252664.823, "dur": 1700.843, + "args": { + "External id": 2946287,"Sequence number": 29294566, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5358 + } + }, + { + "ph": "f", "id": 222, "pid": 1336755, "tid": 1381158, "ts": 1555017252664.823, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017252775.605, "dur": 100.550, + "args": { + "External id": 2946288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017252915.940, "dur": 43.737, + "args": { + "External id": 2946289,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017252975.002, "dur": 99.002, + "args": { + "External id": 2946290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253087.476, "dur": 34.106, + "args": { + "External id": 2946291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253128.119, "dur": 62.818, + "args": { + "External id": 2946292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253203.013, "dur": 30.276, + "args": { + "External id": 2946293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253240.894, "dur": 41.683, + "args": { + "External id": 2946294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017253310.295, "dur": 27.541, + "args": { + "External id": 2946295,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017253356.814, "dur": 32.146, + "args": { + "External id": 2946296,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017253409.324, "dur": 20.552, + "args": { + "External id": 2946297,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017253441.527, "dur": 14.793, + "args": { + "External id": 2946298,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253466.617, "dur": 30.298, + "args": { + "External id": 2946299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253499.993, "dur": 33.818, + "args": { + "External id": 2946300,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017253561.151, "dur": 166.773, + "args": { + "External id": 2946301,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017253637.590, "dur": 5.980, + "args": { + "External id": 2946302,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017253645.600, "dur": 2.410, + "args": { + "External id": 2946303,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017253759.638, "dur": 24.324, + "args": { + "External id": 2946304,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017253794.724, "dur": 16.782, + "args": { + "External id": 2946305,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253820.571, "dur": 36.028, + "args": { + "External id": 2946306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253863.172, "dur": 38.647, + "args": { + "External id": 2946307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253908.548, "dur": 21.487, + "args": { + "External id": 2946308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253935.506, "dur": 33.936, + "args": { + "External id": 2946309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017253977.657, "dur": 79.422, + "args": { + "External id": 2946310,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017254072.440, "dur": 49.976, + "args": { + "External id": 2946311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017254162.556, "dur": 31.815, + "args": { + "External id": 2946312,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017254213.009, "dur": 28.120, + "args": { + "External id": 2946313,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017254255.887, "dur": 18.757, + "args": { + "External id": 2946314,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017254289.392, "dur": 19.052, + "args": { + "External id": 2946315,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017254319.196, "dur": 16.656, + "args": { + "External id": 2946316,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254407.872, "dur": 17.798, + "args": { + "External id": 2946317,"Record function id": 0, "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254414.029, "dur": 10.761, + "args": { + "External id": 2946318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254417.724, "dur": 6.233, + "args": { + "External id": 2946319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254420.095, "dur": 3.752, + "args": { + "External id": 2946320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254429.733, "dur": 7.579, + "args": { + "External id": 2946321,"Record function id": 0, "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254431.212, "dur": 5.674, + "args": { + "External id": 2946322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254432.286, "dur": 3.914, + "args": { + "External id": 2946323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254433.076, "dur": 3.023, + "args": { + "External id": 2946324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254440.604, "dur": 4.919, + "args": { + "External id": 2946325,"Record function id": 0, "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254441.933, "dur": 3.165, + "args": { + "External id": 2946326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254442.854, "dur": 1.846, + "args": { + "External id": 2946327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254443.307, "dur": 1.260, + "args": { + "External id": 2946328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254448.669, "dur": 4.614, + "args": { + "External id": 2946329,"Record function id": 0, "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254450.301, "dur": 2.505, + "args": { + "External id": 2946330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254451.018, "dur": 1.244, + "args": { + "External id": 2946331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254451.496, "dur": 0.693, + "args": { + "External id": 2946332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254456.452, "dur": 4.022, + "args": { + "External id": 2946333,"Record function id": 0, "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254457.757, "dur": 2.323, + "args": { + "External id": 2946334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254458.468, "dur": 1.235, + "args": { + "External id": 2946335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254458.923, "dur": 0.706, + "args": { + "External id": 2946336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254463.588, "dur": 7.920, + "args": { + "External id": 2946337,"Record function id": 0, "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254464.895, "dur": 6.166, + "args": { + "External id": 2946338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254465.962, "dur": 4.546, + "args": { + "External id": 2946339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254466.474, "dur": 3.842, + "args": { + "External id": 2946340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254474.740, "dur": 3.961, + "args": { + "External id": 2946341,"Record function id": 0, "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254475.831, "dur": 2.455, + "args": { + "External id": 2946342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254476.341, "dur": 1.266, + "args": { + "External id": 2946343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254476.782, "dur": 0.701, + "args": { + "External id": 2946344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254481.743, "dur": 3.837, + "args": { + "External id": 2946345,"Record function id": 0, "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254482.739, "dur": 2.319, + "args": { + "External id": 2946346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254483.680, "dur": 0.985, + "args": { + "External id": 2946347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254483.965, "dur": 0.574, + "args": { + "External id": 2946348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254488.525, "dur": 5.588, + "args": { + "External id": 2946349,"Record function id": 0, "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017254489.507, "dur": 4.166, + "args": { + "External id": 2946350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254489.987, "dur": 3.277, + "args": { + "External id": 2946351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017254490.552, "dur": 2.570, + "args": { + "External id": 2946352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017254497.600, "dur": 37005.574, + "args": { + "External id": 2946353,"Record function id": 0, "Sequence number": 29294565, "Fwd thread id": 1, "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017254498.880, "dur": 36995.599, + "args": { + "External id": 2946354,"Sequence number": 29294565, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5425 + } + }, + { + "ph": "f", "id": 223, "pid": 1336755, "tid": 1381158, "ts": 1555017254498.880, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 1336755, "tid": 1381158, + "ts": 1555017254528.431, "dur": 36.487, + "args": { + "External id": 2946355,"Record function id": 0, "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 1336755, "tid": 1381158, + "ts": 1555017254572.308, "dur": 68.510, + "args": { + "External id": 2946356,"Record function id": 0, "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 1336755, "tid": 1381158, + "ts": 1555017254646.399, "dur": 36840.476, + "args": { + "External id": 2946357,"Record function id": 0, "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017254729.512, "dur": 6.107, + "args": { + "External id": 2946358,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017254744.189, "dur": 4.674, + "args": { + "External id": 2946359,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017254762.657, "dur": 35877.060, + "args": { + "External id": 2946360,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017254774.903, "dur": 35856.213, + "args": { + "External id": 2946361,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017254822.377, "dur": 13.005, + "args": { + "External id": 2946362,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017254841.245, "dur": 35750.185, + "args": { + "External id": 2946363,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017254843.521, "dur": 35747.119, + "args": { + "External id": 2946364,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017254847.389, "dur": 5.194, + "args": { + "External id": 2946365,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017254855.316, "dur": 35731.680, + "args": { + "External id": 2946366,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017290727.313, "dur": 10.051, + "args": { + "External id": 2946367,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017290731.410, "dur": 5.634, + "args": { + "External id": 2946368,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017290764.931, "dur": 446.249, + "args": { + "External id": 2946369,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017290794.397, "dur": 411.443, + "args": { + "External id": 2946370,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5441, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017290804.947, "dur": 394.835, + "args": { + "External id": 2946371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017291230.640, "dur": 2.389, + "args": { + "External id": 2946372,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5443, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291291.695, "dur": 6.344, + "args": { + "External id": 2946373,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291338.456, "dur": 1.526, + "args": { + "External id": 2946374,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291357.270, "dur": 1.191, + "args": { + "External id": 2946375,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291370.121, "dur": 1.299, + "args": { + "External id": 2946376,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291380.823, "dur": 1.124, + "args": { + "External id": 2946377,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291391.854, "dur": 1.047, + "args": { + "External id": 2946378,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291404.818, "dur": 1.040, + "args": { + "External id": 2946379,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291416.244, "dur": 1.283, + "args": { + "External id": 2946380,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291427.305, "dur": 1.025, + "args": { + "External id": 2946381,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017291516.244, "dur": 2759.074, + "args": { + "External id": 2946382,"Record function id": 0, "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 1336755, "tid": 1381158, + "ts": 1555017291536.023, "dur": 1019.637, + "args": { + "External id": 2946383,"Record function id": 0, "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 1336755, "tid": 1381158, + "ts": 1555017291550.946, "dur": 305.229, + "args": { + "External id": 2946384,"Record function id": 0, "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291639.531, "dur": 3.685, + "args": { + "External id": 2946385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291646.318, "dur": 1.076, + "args": { + "External id": 2946386,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291648.966, "dur": 1.071, + "args": { + "External id": 2946387,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291651.489, "dur": 2.661, + "args": { + "External id": 2946388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291655.414, "dur": 0.998, + "args": { + "External id": 2946389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291657.674, "dur": 0.844, + "args": { + "External id": 2946390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291660.227, "dur": 0.896, + "args": { + "External id": 2946391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291664.016, "dur": 0.959, + "args": { + "External id": 2946392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291666.600, "dur": 1.078, + "args": { + "External id": 2946393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017291669.489, "dur": 0.926, + "args": { + "External id": 2946394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017291686.749, "dur": 143.785, + "args": { + "External id": 2946395,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017291702.340, "dur": 124.066, + "args": { + "External id": 2946396,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017291717.695, "dur": 13.224, + "args": { + "External id": 2946397,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017291733.749, "dur": 65.930, + "args": { + "External id": 2946398,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017291737.711, "dur": 61.597, + "args": { + "External id": 2946399,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017291741.239, "dur": 7.198, + "args": { + "External id": 2946400,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017291750.291, "dur": 48.406, + "args": { + "External id": 2946401,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 1336755, "tid": 1381158, + "ts": 1555017291955.903, "dur": 592.649, + "args": { + "External id": 2946402,"Record function id": 0, "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 1336755, "tid": 1381158, + "ts": 1555017291972.244, "dur": 564.421, + "args": { + "External id": 2946403,"Record function id": 0, "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017292082.667, "dur": 6.101, + "args": { + "External id": 2946404,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017292103.535, "dur": 28.116, + "args": { + "External id": 2946405,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292108.269, "dur": 1.812, + "args": { + "External id": 2946406,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292112.196, "dur": 0.681, + "args": { + "External id": 2946407,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292114.722, "dur": 0.532, + "args": { + "External id": 2946408,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292116.433, "dur": 0.645, + "args": { + "External id": 2946409,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292118.668, "dur": 0.563, + "args": { + "External id": 2946410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292120.926, "dur": 0.423, + "args": { + "External id": 2946411,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292122.518, "dur": 2.244, + "args": { + "External id": 2946412,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292126.606, "dur": 0.358, + "args": { + "External id": 2946413,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292128.433, "dur": 0.329, + "args": { + "External id": 2946414,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017292156.316, "dur": 36.260, + "args": { + "External id": 2946415,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1381158, + "ts": 1555017292224.676, "dur": 101.890, + "args": { + "External id": 2946416,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017292234.730, "dur": 4.132, + "args": { + "External id": 2946417,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1381158, + "ts": 1555017292244.201, "dur": 10.198, + "args": { + "External id": 2946418,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1381158, + "ts": 1555017292248.461, "dur": 5.545, + "args": { + "External id": 2946419,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292251.883, "dur": 0.665, + "args": { + "External id": 2946420,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1381158, + "ts": 1555017292261.174, "dur": 27.711, + "args": { + "External id": 2946421,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292263.050, "dur": 0.924, + "args": { + "External id": 2946422,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292266.114, "dur": 0.751, + "args": { + "External id": 2946423,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292268.509, "dur": 0.284, + "args": { + "External id": 2946424,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292270.099, "dur": 2.258, + "args": { + "External id": 2946425,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292274.258, "dur": 0.551, + "args": { + "External id": 2946426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292275.825, "dur": 0.379, + "args": { + "External id": 2946427,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292278.017, "dur": 0.415, + "args": { + "External id": 2946428,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292279.906, "dur": 0.606, + "args": { + "External id": 2946429,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017292281.580, "dur": 0.341, + "args": { + "External id": 2946430,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017292299.931, "dur": 19.581, + "args": { + "External id": 2946431,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017292369.511, "dur": 105.952, + "args": { + "External id": 2946432,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017292391.336, "dur": 81.043, + "args": { + "External id": 2946433,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5504, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017292400.931, "dur": 66.790, + "args": { + "External id": 2946434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017292488.779, "dur": 2.003, + "args": { + "External id": 2946435,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5506, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017292562.787, "dur": 1690.528, + "args": { + "External id": 2946436,"Sequence number": 29294564, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5507 + } + }, + { + "ph": "f", "id": 224, "pid": 1336755, "tid": 1381158, "ts": 1555017292562.787, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017292669.470, "dur": 100.400, + "args": { + "External id": 2946437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017292807.888, "dur": 40.289, + "args": { + "External id": 2946438,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017292864.558, "dur": 48.964, + "args": { + "External id": 2946439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017292922.807, "dur": 31.071, + "args": { + "External id": 2946440,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017292960.502, "dur": 81.392, + "args": { + "External id": 2946441,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293055.668, "dur": 32.952, + "args": { + "External id": 2946442,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293096.514, "dur": 43.382, + "args": { + "External id": 2946443,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017293181.266, "dur": 30.218, + "args": { + "External id": 2946444,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017293230.664, "dur": 31.737, + "args": { + "External id": 2946445,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017293283.836, "dur": 20.808, + "args": { + "External id": 2946446,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017293317.939, "dur": 15.883, + "args": { + "External id": 2946447,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293344.590, "dur": 35.049, + "args": { + "External id": 2946448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293382.681, "dur": 37.658, + "args": { + "External id": 2946449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017293451.737, "dur": 173.181, + "args": { + "External id": 2946450,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017293529.165, "dur": 6.123, + "args": { + "External id": 2946451,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017293537.359, "dur": 2.115, + "args": { + "External id": 2946452,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017293657.825, "dur": 25.805, + "args": { + "External id": 2946453,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017293694.395, "dur": 16.440, + "args": { + "External id": 2946454,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293719.272, "dur": 37.384, + "args": { + "External id": 2946455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293764.785, "dur": 38.255, + "args": { + "External id": 2946456,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293809.744, "dur": 24.972, + "args": { + "External id": 2946457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293840.664, "dur": 30.041, + "args": { + "External id": 2946458,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293880.092, "dur": 21.505, + "args": { + "External id": 2946459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017293911.410, "dur": 33.918, + "args": { + "External id": 2946460,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017293961.345, "dur": 68.808, + "args": { + "External id": 2946461,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017294068.673, "dur": 33.775, + "args": { + "External id": 2946462,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017294118.847, "dur": 21.723, + "args": { + "External id": 2946463,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017294175.816, "dur": 18.378, + "args": { + "External id": 2946464,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017294207.912, "dur": 16.690, + "args": { + "External id": 2946465,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294296.353, "dur": 15.746, + "args": { + "External id": 2946466,"Record function id": 0, "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294300.024, "dur": 11.232, + "args": { + "External id": 2946467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294304.012, "dur": 6.386, + "args": { + "External id": 2946468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294306.152, "dur": 4.117, + "args": { + "External id": 2946469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294316.306, "dur": 5.385, + "args": { + "External id": 2946470,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294317.461, "dur": 3.809, + "args": { + "External id": 2946471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294318.435, "dur": 2.361, + "args": { + "External id": 2946472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294319.716, "dur": 0.969, + "args": { + "External id": 2946473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294325.062, "dur": 5.458, + "args": { + "External id": 2946474,"Record function id": 0, "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294326.694, "dur": 3.417, + "args": { + "External id": 2946475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294327.695, "dur": 1.981, + "args": { + "External id": 2946476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294328.447, "dur": 1.152, + "args": { + "External id": 2946477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294333.951, "dur": 5.575, + "args": { + "External id": 2946478,"Record function id": 0, "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294335.769, "dur": 3.320, + "args": { + "External id": 2946479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294336.682, "dur": 1.870, + "args": { + "External id": 2946480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294337.361, "dur": 1.099, + "args": { + "External id": 2946481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294342.561, "dur": 7.155, + "args": { + "External id": 2946482,"Record function id": 0, "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294344.157, "dur": 5.141, + "args": { + "External id": 2946483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294344.888, "dur": 3.940, + "args": { + "External id": 2946484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294345.277, "dur": 3.479, + "args": { + "External id": 2946485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294352.892, "dur": 4.846, + "args": { + "External id": 2946486,"Record function id": 0, "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294354.209, "dur": 3.089, + "args": { + "External id": 2946487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294354.916, "dur": 1.846, + "args": { + "External id": 2946488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294355.370, "dur": 1.256, + "args": { + "External id": 2946489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294361.016, "dur": 4.432, + "args": { + "External id": 2946490,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294362.447, "dur": 2.576, + "args": { + "External id": 2946491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294363.220, "dur": 1.271, + "args": { + "External id": 2946492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294363.505, "dur": 0.881, + "args": { + "External id": 2946493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294368.535, "dur": 4.393, + "args": { + "External id": 2946494,"Record function id": 0, "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294369.628, "dur": 2.869, + "args": { + "External id": 2946495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294370.337, "dur": 1.623, + "args": { + "External id": 2946496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294370.875, "dur": 0.934, + "args": { + "External id": 2946497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294376.072, "dur": 4.992, + "args": { + "External id": 2946498,"Record function id": 0, "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017294377.450, "dur": 3.172, + "args": { + "External id": 2946499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294378.400, "dur": 1.795, + "args": { + "External id": 2946500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017294379.341, "dur": 0.708, + "args": { + "External id": 2946501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017294385.376, "dur": 36977.510, + "args": { + "External id": 2946502,"Record function id": 0, "Sequence number": 29294563, "Fwd thread id": 1, "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017294387.022, "dur": 36967.744, + "args": { + "External id": 2946503,"Sequence number": 29294563, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5574 + } + }, + { + "ph": "f", "id": 225, "pid": 1336755, "tid": 1381158, "ts": 1555017294387.022, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 1336755, "tid": 1381158, + "ts": 1555017294416.478, "dur": 36.982, + "args": { + "External id": 2946504,"Record function id": 0, "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 1336755, "tid": 1381158, + "ts": 1555017294461.035, "dur": 72.379, + "args": { + "External id": 2946505,"Record function id": 0, "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 1336755, "tid": 1381158, + "ts": 1555017294539.147, "dur": 36808.167, + "args": { + "External id": 2946506,"Record function id": 0, "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017294623.687, "dur": 6.365, + "args": { + "External id": 2946507,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017294638.569, "dur": 4.731, + "args": { + "External id": 2946508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017294656.100, "dur": 35753.248, + "args": { + "External id": 2946509,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017294668.676, "dur": 35731.963, + "args": { + "External id": 2946510,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017294712.693, "dur": 12.831, + "args": { + "External id": 2946511,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017294731.321, "dur": 35630.706, + "args": { + "External id": 2946512,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017294733.985, "dur": 35627.415, + "args": { + "External id": 2946513,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017294737.527, "dur": 4.551, + "args": { + "External id": 2946514,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017294744.788, "dur": 35613.222, + "args": { + "External id": 2946515,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017330493.127, "dur": 9.783, + "args": { + "External id": 2946516,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017330495.788, "dur": 6.698, + "args": { + "External id": 2946517,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017330532.278, "dur": 517.196, + "args": { + "External id": 2946518,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017330559.430, "dur": 484.942, + "args": { + "External id": 2946519,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5590, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017330572.273, "dur": 466.395, + "args": { + "External id": 2946520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017331070.283, "dur": 2.263, + "args": { + "External id": 2946521,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5592, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331131.876, "dur": 6.432, + "args": { + "External id": 2946522,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331195.453, "dur": 3.926, + "args": { + "External id": 2946523,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331215.157, "dur": 1.467, + "args": { + "External id": 2946524,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331228.504, "dur": 1.420, + "args": { + "External id": 2946525,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331241.358, "dur": 0.991, + "args": { + "External id": 2946526,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331251.957, "dur": 2.665, + "args": { + "External id": 2946527,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331265.129, "dur": 1.450, + "args": { + "External id": 2946528,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331276.901, "dur": 1.844, + "args": { + "External id": 2946529,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331289.578, "dur": 0.693, + "args": { + "External id": 2946530,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017331377.681, "dur": 2141.128, + "args": { + "External id": 2946531,"Record function id": 0, "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 1336755, "tid": 1381158, + "ts": 1555017331395.894, "dur": 438.526, + "args": { + "External id": 2946532,"Record function id": 0, "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 1336755, "tid": 1381158, + "ts": 1555017331409.490, "dur": 316.884, + "args": { + "External id": 2946533,"Record function id": 0, "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331501.074, "dur": 3.914, + "args": { + "External id": 2946534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331508.372, "dur": 1.468, + "args": { + "External id": 2946535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331511.782, "dur": 2.924, + "args": { + "External id": 2946536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331516.487, "dur": 1.058, + "args": { + "External id": 2946537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331518.902, "dur": 1.230, + "args": { + "External id": 2946538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331521.682, "dur": 0.965, + "args": { + "External id": 2946539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331524.211, "dur": 1.715, + "args": { + "External id": 2946540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331527.519, "dur": 1.756, + "args": { + "External id": 2946541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331532.012, "dur": 1.250, + "args": { + "External id": 2946542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017331534.690, "dur": 1.334, + "args": { + "External id": 2946543,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017331552.839, "dur": 143.783, + "args": { + "External id": 2946544,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017331569.854, "dur": 122.902, + "args": { + "External id": 2946545,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017331584.810, "dur": 12.625, + "args": { + "External id": 2946546,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017331600.410, "dur": 65.204, + "args": { + "External id": 2946547,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017331603.078, "dur": 62.188, + "args": { + "External id": 2946548,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017331606.656, "dur": 6.108, + "args": { + "External id": 2946549,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017331614.519, "dur": 49.885, + "args": { + "External id": 2946550,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017331840.847, "dur": 1653.539, + "args": { + "External id": 2946551,"Sequence number": 29294562, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5622 + } + }, + { + "ph": "f", "id": 226, "pid": 1336755, "tid": 1381158, "ts": 1555017331840.847, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017331944.040, "dur": 144.878, + "args": { + "External id": 2946552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017332130.492, "dur": 57.453, + "args": { + "External id": 2946553,"kernel_hash": "cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn36lfkgqqupbxwn2zb3d7hbx6wasbkfw25e2tiypbtxd7numdjk.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336755, "tid": 1381158, + "ts": 1555017332208.186, "dur": 55.969, + "args": { + "External id": 2946554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017332275.508, "dur": 32.976, + "args": { + "External id": 2946555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017332315.420, "dur": 46.103, + "args": { + "External id": 2946556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017332368.735, "dur": 26.958, + "args": { + "External id": 2946557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017332403.738, "dur": 42.506, + "args": { + "External id": 2946558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017332470.177, "dur": 24.011, + "args": { + "External id": 2946559,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017332513.017, "dur": 32.530, + "args": { + "External id": 2946560,"kernel_hash": "cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ft/cftva7q4tthftm2pfffkbnwvi3qf66xwuas7mfdn6kytjjiyl5z2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017332586.161, "dur": 22.148, + "args": { + "External id": 2946561,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017332620.675, "dur": 14.832, + "args": { + "External id": 2946562,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017332644.066, "dur": 30.479, + "args": { + "External id": 2946563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017332677.938, "dur": 35.248, + "args": { + "External id": 2946564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336755, "tid": 1381158, + "ts": 1555017332742.318, "dur": 163.205, + "args": { + "External id": 2946565,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017332815.823, "dur": 5.875, + "args": { + "External id": 2946566,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017332823.576, "dur": 2.086, + "args": { + "External id": 2946567,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017332935.316, "dur": 24.987, + "args": { + "External id": 2946568,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017332970.557, "dur": 53.629, + "args": { + "External id": 2946569,"kernel_hash": "c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2k/c2kxhcgwf5peuymkqkwnwh2gib2ndh44brhtyha53rjcduz6hvjt.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017333035.030, "dur": 43.025, + "args": { + "External id": 2946570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017333089.663, "dur": 38.049, + "args": { + "External id": 2946571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017333134.678, "dur": 42.584, + "args": { + "External id": 2946572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017333187.385, "dur": 33.077, + "args": { + "External id": 2946573,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017333227.754, "dur": 20.714, + "args": { + "External id": 2946574,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1381158, + "ts": 1555017333257.378, "dur": 30.648, + "args": { + "External id": 2946575,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336755, "tid": 1381158, + "ts": 1555017333305.200, "dur": 23.834, + "args": { + "External id": 2946576,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017333346.401, "dur": 25.476, + "args": { + "External id": 2946577,"kernel_hash": "c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/7h/c7hev5r5fuhwcl5w7lgezzrl3pqoztu6hexnzvintjo3g5dp4qul.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017333387.947, "dur": 17.345, + "args": { + "External id": 2946578,"kernel_hash": "c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/7o/c7ocuz4slsjtuvmqmwy4afzvxcvtxlhfpr7l35az36sf2i4l6skd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336755, "tid": 1381158, + "ts": 1555017333418.385, "dur": 18.171, + "args": { + "External id": 2946579,"kernel_hash": "cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/q5/cq5g2qwitmglmmtdcjxgfatshc3yhpajgog4h5a66t4v3wsxsiak.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336755, "tid": 1381158, + "ts": 1555017333446.966, "dur": 15.918, + "args": { + "External id": 2946580,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333540.759, "dur": 17.531, + "args": { + "External id": 2946581,"Record function id": 0, "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333544.457, "dur": 13.011, + "args": { + "External id": 2946582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333548.142, "dur": 8.426, + "args": { + "External id": 2946583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333552.567, "dur": 3.864, + "args": { + "External id": 2946584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333561.971, "dur": 4.740, + "args": { + "External id": 2946585,"Record function id": 0, "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333563.224, "dur": 3.049, + "args": { + "External id": 2946586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333564.025, "dur": 1.792, + "args": { + "External id": 2946587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333564.851, "dur": 0.890, + "args": { + "External id": 2946588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333569.938, "dur": 5.013, + "args": { + "External id": 2946589,"Record function id": 0, "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333571.692, "dur": 2.771, + "args": { + "External id": 2946590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333572.182, "dur": 1.828, + "args": { + "External id": 2946591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333572.694, "dur": 1.218, + "args": { + "External id": 2946592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333578.080, "dur": 4.676, + "args": { + "External id": 2946593,"Record function id": 0, "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333579.648, "dur": 2.706, + "args": { + "External id": 2946594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333580.266, "dur": 1.685, + "args": { + "External id": 2946595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333580.766, "dur": 1.069, + "args": { + "External id": 2946596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333585.873, "dur": 4.776, + "args": { + "External id": 2946597,"Record function id": 0, "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333587.451, "dur": 2.677, + "args": { + "External id": 2946598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333588.281, "dur": 1.451, + "args": { + "External id": 2946599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333588.539, "dur": 1.104, + "args": { + "External id": 2946600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333593.749, "dur": 4.269, + "args": { + "External id": 2946601,"Record function id": 0, "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333595.122, "dur": 2.460, + "args": { + "External id": 2946602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333595.855, "dur": 1.293, + "args": { + "External id": 2946603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333596.486, "dur": 0.543, + "args": { + "External id": 2946604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333601.303, "dur": 9.285, + "args": { + "External id": 2946605,"Record function id": 0, "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333602.683, "dur": 7.458, + "args": { + "External id": 2946606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333603.279, "dur": 3.400, + "args": { + "External id": 2946607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333603.581, "dur": 2.967, + "args": { + "External id": 2946608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333613.856, "dur": 5.468, + "args": { + "External id": 2946609,"Record function id": 0, "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333615.721, "dur": 3.105, + "args": { + "External id": 2946610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333616.566, "dur": 1.803, + "args": { + "External id": 2946611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333616.839, "dur": 1.436, + "args": { + "External id": 2946612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333622.349, "dur": 4.201, + "args": { + "External id": 2946613,"Record function id": 0, "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017333623.719, "dur": 2.404, + "args": { + "External id": 2946614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333624.582, "dur": 1.150, + "args": { + "External id": 2946615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017333625.003, "dur": 0.634, + "args": { + "External id": 2946616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017333630.743, "dur": 36942.036, + "args": { + "External id": 2946617,"Record function id": 0, "Sequence number": 29294561, "Fwd thread id": 1, "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017333632.119, "dur": 36932.529, + "args": { + "External id": 2946618,"Sequence number": 29294561, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5689 + } + }, + { + "ph": "f", "id": 227, "pid": 1336755, "tid": 1381158, "ts": 1555017333632.119, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 1336755, "tid": 1381158, + "ts": 1555017333658.282, "dur": 38.794, + "args": { + "External id": 2946619,"Record function id": 0, "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 1336755, "tid": 1381158, + "ts": 1555017333703.814, "dur": 74.146, + "args": { + "External id": 2946620,"Record function id": 0, "Ev Idx": 5691 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 1336755, "tid": 1381158, + "ts": 1555017333786.220, "dur": 36771.122, + "args": { + "External id": 2946621,"Record function id": 0, "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017333884.278, "dur": 6.372, + "args": { + "External id": 2946622,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017333899.826, "dur": 4.525, + "args": { + "External id": 2946623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017333918.437, "dur": 35827.166, + "args": { + "External id": 2946624,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017333931.610, "dur": 35805.404, + "args": { + "External id": 2946625,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017333979.422, "dur": 54.648, + "args": { + "External id": 2946626,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017334042.526, "dur": 35651.402, + "args": { + "External id": 2946627,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017334044.816, "dur": 35648.456, + "args": { + "External id": 2946628,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017334052.461, "dur": 5.766, + "args": { + "External id": 2946629,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017334060.028, "dur": 35630.149, + "args": { + "External id": 2946630,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017369828.907, "dur": 8.620, + "args": { + "External id": 2946631,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017369831.438, "dur": 5.733, + "args": { + "External id": 2946632,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017369866.926, "dur": 403.250, + "args": { + "External id": 2946633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017369894.095, "dur": 370.855, + "args": { + "External id": 2946634,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5705, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017369905.233, "dur": 354.081, + "args": { + "External id": 2946635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017370290.341, "dur": 2.518, + "args": { + "External id": 2946636,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5707, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370354.849, "dur": 6.638, + "args": { + "External id": 2946637,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370403.843, "dur": 1.551, + "args": { + "External id": 2946638,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370420.937, "dur": 3.903, + "args": { + "External id": 2946639,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370437.733, "dur": 0.992, + "args": { + "External id": 2946640,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370451.076, "dur": 1.125, + "args": { + "External id": 2946641,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370462.618, "dur": 1.076, + "args": { + "External id": 2946642,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370474.621, "dur": 3.099, + "args": { + "External id": 2946643,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370489.434, "dur": 0.956, + "args": { + "External id": 2946644,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017370500.399, "dur": 1.052, + "args": { + "External id": 2946645,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017370586.064, "dur": 277.102, + "args": { + "External id": 2946646,"Record function id": 0, "Sequence number": 29294560, "Fwd thread id": 1, "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336755, "tid": 1381158, + "ts": 1555017370588.861, "dur": 265.600, + "args": { + "External id": 2946647,"Sequence number": 29294560, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5718 + } + }, + { + "ph": "f", "id": 228, "pid": 1336755, "tid": 1381158, "ts": 1555017370588.861, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 1336755, "tid": 1381158, + "ts": 1555017370706.805, "dur": 45.910, + "args": { + "External id": 2946648,"kernel_hash": "cmoh6wfeedsm7av7lakucppei35p6y74vuqdepqnvao7auiukn35", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/mo/cmoh6wfeedsm7av7lakucppei35p6y74vuqdepqnvao7auiukn35.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 1336755, "tid": 1381158, + "ts": 1555017370768.666, "dur": 31.006, + "args": { + "External id": 2946649,"kernel_hash": "csul2yqijdgvgyk5nmoojt6ynwrtqvvhhsjr4ep2yevpyhyafiih", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/su/csul2yqijdgvgyk5nmoojt6ynwrtqvvhhsjr4ep2yevpyhyafiih.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096], [16, 4096, 2048], [32000, 2048], []], "Ev Idx": 5720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 1336755, "tid": 1381158, + "ts": 1555017370816.512, "dur": 23.543, + "args": { + "External id": 2946650,"kernel_hash": "c3parwwogviovkydlbvnscusjw2v6hbjksr25p7owhrnzjolvccu", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/3p/c3parwwogviovkydlbvnscusjw2v6hbjksr25p7owhrnzjolvccu.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017370872.720, "dur": 14.101, + "args": { + "External id": 2946651,"Record function id": 0, "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336755, "tid": 1381158, + "ts": 1555017370875.630, "dur": 10.336, + "args": { + "External id": 2946652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017370879.018, "dur": 6.131, + "args": { + "External id": 2946653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1381158, + "ts": 1555017370881.119, "dur": 3.887, + "args": { + "External id": 2946654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 1336755, "tid": 1381158, + "ts": 1555017370908.105, "dur": 11246.181, + "args": { + "External id": 2946655,"Record function id": 0, "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 1336755, "tid": 1381158, + "ts": 1555017370926.818, "dur": 40.463, + "args": { + "External id": 2946656,"Record function id": 0, "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 1336755, "tid": 1381158, + "ts": 1555017370972.874, "dur": 346.724, + "args": { + "External id": 2946657,"Record function id": 0, "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 1336755, "tid": 1381158, + "ts": 1555017371328.144, "dur": 10506.706, + "args": { + "External id": 2946658,"Record function id": 0, "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017371453.226, "dur": 6.673, + "args": { + "External id": 2946659,"Record function id": 0, "Concrete Inputs": ["[336611328]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1381158, + "ts": 1555017371470.778, "dur": 4.899, + "args": { + "External id": 2946660,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[336611328], []], "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017371496.063, "dur": 8993.344, + "args": { + "External id": 2946661,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [42076416, 1]], "Input Dims": [[], [], [], [8, 42076416]], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336755, "tid": 1381158, + "ts": 1555017371514.222, "dur": 8962.835, + "args": { + "External id": 2946662,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [42076416, 1]], "Input Dims": [[], [], [], [8, 42076416]], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017371864.908, "dur": 15.896, + "args": { + "External id": 2946663,"Record function id": 0, "Concrete Inputs": ["[82421]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1381158, + "ts": 1555017371972.904, "dur": 8458.392, + "args": { + "External id": 2946664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[82421], [], [], [], [], [], [], []], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1381158, + "ts": 1555017371975.515, "dur": 8454.769, + "args": { + "External id": 2946665,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[82421], [], [], [], [], [], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017371980.114, "dur": 47.964, + "args": { + "External id": 2946666,"Record function id": 0, "Concrete Inputs": ["[82421]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1381158, + "ts": 1555017372031.877, "dur": 8393.357, + "args": { + "External id": 2946667,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[82421], [82421], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017380627.375, "dur": 10.547, + "args": { + "External id": 2946668,"Record function id": 0, "Concrete Inputs": ["", "[42076416]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[336611328], [], [], [], [], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1381158, + "ts": 1555017380630.406, "dur": 7.110, + "args": { + "External id": 2946669,"Record function id": 0, "Concrete Inputs": ["[42076416]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336755, "tid": 1381158, + "ts": 1555017380666.809, "dur": 521.620, + "args": { + "External id": 2946670,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[42076416], [336611328], [], [], [], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017380692.375, "dur": 490.891, + "args": { + "External id": 2946671,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 42076416, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[336611328], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5742, "In msg nelems": 336611328 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336755, "tid": 1381158, + "ts": 1555017380707.063, "dur": 470.256, + "args": { + "External id": 2946672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[336611328]], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1381158, + "ts": 1555017381206.849, "dur": 2.384, + "args": { + "External id": 2946673,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5744, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381265.159, "dur": 6.488, + "args": { + "External id": 2946674,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381312.103, "dur": 1.497, + "args": { + "External id": 2946675,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381328.548, "dur": 1.091, + "args": { + "External id": 2946676,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "8192256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381341.877, "dur": 3.105, + "args": { + "External id": 2946677,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "8716544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381356.971, "dur": 1.127, + "args": { + "External id": 2946678,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "9240832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381368.575, "dur": 1.942, + "args": { + "External id": 2946679,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "9765120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381382.272, "dur": 1.279, + "args": { + "External id": 2946680,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "10289408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381394.631, "dur": 3.407, + "args": { + "External id": 2946681,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "10289664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381409.987, "dur": 1.535, + "args": { + "External id": 2946682,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "11731456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381422.035, "dur": 1.358, + "args": { + "External id": 2946683,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "13173248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381433.108, "dur": 1.646, + "args": { + "External id": 2946684,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14615040"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381444.684, "dur": 3.093, + "args": { + "External id": 2946685,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "14615296"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381457.894, "dur": 1.340, + "args": { + "External id": 2946686,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "15139584"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381469.483, "dur": 1.452, + "args": { + "External id": 2946687,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "15663872"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381481.344, "dur": 1.248, + "args": { + "External id": 2946688,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "16188160"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381493.139, "dur": 2.386, + "args": { + "External id": 2946689,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "16712448"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381505.949, "dur": 1.198, + "args": { + "External id": 2946690,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "16712704"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381518.312, "dur": 1.255, + "args": { + "External id": 2946691,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "18154496"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381530.751, "dur": 1.174, + "args": { + "External id": 2946692,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "19596288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381542.456, "dur": 2.524, + "args": { + "External id": 2946693,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21038080"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381556.047, "dur": 1.354, + "args": { + "External id": 2946694,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "21038336"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381567.989, "dur": 1.618, + "args": { + "External id": 2946695,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "21562624"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381580.245, "dur": 1.329, + "args": { + "External id": 2946696,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "22086912"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381592.520, "dur": 3.052, + "args": { + "External id": 2946697,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "22611200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381605.843, "dur": 0.991, + "args": { + "External id": 2946698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "23135488"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381618.565, "dur": 1.263, + "args": { + "External id": 2946699,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "23135744"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381630.151, "dur": 1.332, + "args": { + "External id": 2946700,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "24577536"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381641.093, "dur": 3.310, + "args": { + "External id": 2946701,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "26019328"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381655.244, "dur": 1.169, + "args": { + "External id": 2946702,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "27461120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381667.602, "dur": 1.103, + "args": { + "External id": 2946703,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "27461376"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381679.021, "dur": 1.615, + "args": { + "External id": 2946704,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "27985664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381690.039, "dur": 3.167, + "args": { + "External id": 2946705,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "28509952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381703.806, "dur": 1.395, + "args": { + "External id": 2946706,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "29034240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381716.381, "dur": 1.375, + "args": { + "External id": 2946707,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "29558528"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381727.712, "dur": 1.738, + "args": { + "External id": 2946708,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "29558784"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381740.090, "dur": 2.702, + "args": { + "External id": 2946709,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "31000576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381752.894, "dur": 1.570, + "args": { + "External id": 2946710,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "32442368"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381764.316, "dur": 1.308, + "args": { + "External id": 2946711,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "33884160"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1381158, + "ts": 1555017381776.350, "dur": 1.792, + "args": { + "External id": 2946712,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "33884416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#121855", "pid": 1336755, "tid": 1336755, + "ts": 1555015267691.436, "dur": 2132937.793, + "args": { + "External id": 2935809,"Record function id": 0, "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 1336755, "tid": 1336755, + "ts": 1555015267724.557, "dur": 754.661, + "args": { + "External id": 2935810,"Record function id": 0, "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 1336755, "tid": 1336755, + "ts": 1555015268524.374, "dur": 2198.964, + "args": { + "External id": 2935811,"Record function id": 0, "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015269587.580, "dur": 7.117, + "args": { + "External id": 2935812,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 1336755, "tid": 1336755, + "ts": 1555015269615.217, "dur": 6.495, + "args": { + "External id": 2935813,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015270169.592, "dur": 2.502, + "args": { + "External id": 2935814,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 1336755, "tid": 1336755, + "ts": 1555015270182.116, "dur": 2.706, + "args": { + "External id": 2935815,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015270609.483, "dur": 1.863, + "args": { + "External id": 2935816,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 1336755, "tid": 1336755, + "ts": 1555015270615.806, "dur": 2.034, + "args": { + "External id": 2935817,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 5792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015271349.740, "dur": 14.755, + "args": { + "External id": 2935818,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015271358.643, "dur": 2.044, + "args": { + "External id": 2935819,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015271366.208, "dur": 3.753, + "args": { + "External id": 2935820,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015271368.115, "dur": 0.858, + "args": { + "External id": 2935821,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015271395.233, "dur": 625.170, + "args": { + "External id": 2935822,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], [], []], "Ev Idx": 5797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015271402.603, "dur": 616.986, + "args": { + "External id": 2935823,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015271410.852, "dur": 9.855, + "args": { + "External id": 2935824,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015271423.123, "dur": 594.140, + "args": { + "External id": 2935825,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015271432.441, "dur": 0.496, + "args": { + "External id": 2935826,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015271435.874, "dur": 7.620, + "args": { + "External id": 2935827,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[16, 4096], [16, 4096]], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 1336755, "tid": 1336755, + "ts": 1555015271439.136, "dur": 4.106, + "args": { + "External id": 2935828,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[16, 4096], [], []], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015271442.299, "dur": 0.582, + "args": { + "External id": 2935829,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555015271445.937, "dur": 272.030, + "args": { + "External id": 2935830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555015271450.567, "dur": 267.075, + "args": { + "External id": 2935831,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015271452.867, "dur": 14.315, + "args": { + "External id": 2935832,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015271454.803, "dur": 11.895, + "args": { + "External id": 2935833,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015271467.891, "dur": 249.242, + "args": { + "External id": 2935834,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015271720.345, "dur": 258.897, + "args": { + "External id": 2935835,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015272038.349, "dur": 753.528, + "args": { + "External id": 2935836,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 5811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015272040.904, "dur": 750.123, + "args": { + "External id": 2935837,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], []], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015272049.483, "dur": 8.778, + "args": { + "External id": 2935838,"Record function id": 0, "Concrete Inputs": ["[16, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015272059.367, "dur": 727.593, + "args": { + "External id": 2935839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[16, 8192], [16, 8192], []], "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 1336755, "tid": 1336755, + "ts": 1555015272822.480, "dur": 54.412, + "args": { + "External id": 2935840,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015272828.307, "dur": 5.559, + "args": { + "External id": 2935841,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 1336755, "tid": 1336755, + "ts": 1555015272837.322, "dur": 39.165, + "args": { + "External id": 2935842,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015272842.561, "dur": 6.854, + "args": { + "External id": 2935843,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 1336755, "tid": 1336755, + "ts": 1555015272887.740, "dur": 71.754, + "args": { + "External id": 2935844,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 1336755, "tid": 1336755, + "ts": 1555015272895.091, "dur": 7.276, + "args": { + "External id": 2935845,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015272900.323, "dur": 1.709, + "args": { + "External id": 2935846,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015272903.264, "dur": 4.512, + "args": { + "External id": 2935847,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1336755, + "ts": 1555015272910.253, "dur": 3.809, + "args": { + "External id": 2935848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[16, 4096]], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 1336755, "tid": 1336755, + "ts": 1555015272916.695, "dur": 4.401, + "args": { + "External id": 2935849,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015272920.358, "dur": 0.578, + "args": { + "External id": 2935850,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 5825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 1336755, "tid": 1336755, + "ts": 1555015272922.060, "dur": 3.952, + "args": { + "External id": 2935851,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015272925.267, "dur": 0.660, + "args": { + "External id": 2935852,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015272927.913, "dur": 5.644, + "args": { + "External id": 2935853,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [16, 1, 1, 4096]], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 1336755, "tid": 1336755, + "ts": 1555015272931.011, "dur": 2.391, + "args": { + "External id": 2935854,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015272932.489, "dur": 0.713, + "args": { + "External id": 2935855,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015272934.691, "dur": 24.172, + "args": { + "External id": 2935856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[16, 1, 1, 4096], [16, 1, 1, 4096], []], "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015272966.976, "dur": 67.951, + "args": { + "External id": 2935857,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015272968.293, "dur": 66.432, + "args": { + "External id": 2935858,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015272973.102, "dur": 3.013, + "args": { + "External id": 2935859,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015272976.932, "dur": 56.860, + "args": { + "External id": 2935860,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015273163.352, "dur": 150.635, + "args": { + "External id": 2935861,"Record function id": 0, "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 1336755, "tid": 1336755, + "ts": 1555015273248.590, "dur": 54.090, + "args": { + "External id": 2935862,"Record function id": 0, "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015273321.279, "dur": 44.553, + "args": { + "External id": 2935863,"Record function id": 0, "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015273374.925, "dur": 8564.345, + "args": { + "External id": 2935864,"Record function id": 0, "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 1336755, "tid": 1336755, + "ts": 1555015273384.962, "dur": 1280.587, + "args": { + "External id": 2935865,"Record function id": 0, "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015273506.807, "dur": 7.909, + "args": { + "External id": 2935866,"Record function id": 0, "Concrete Inputs": ["[42076416]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015273533.560, "dur": 119.003, + "args": { + "External id": 2935867,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[42076416], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273538.440, "dur": 1.789, + "args": { + "External id": 2935868,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273545.800, "dur": 0.224, + "args": { + "External id": 2935869,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273546.604, "dur": 0.449, + "args": { + "External id": 2935870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "8192256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273547.746, "dur": 1.683, + "args": { + "External id": 2935871,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "8716544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273553.898, "dur": 0.345, + "args": { + "External id": 2935872,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "9240832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273555.293, "dur": 0.425, + "args": { + "External id": 2935873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "9765120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273556.285, "dur": 1.996, + "args": { + "External id": 2935874,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "10289408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273560.951, "dur": 0.498, + "args": { + "External id": 2935875,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "10289664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273562.025, "dur": 0.539, + "args": { + "External id": 2935876,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "11731456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273567.500, "dur": 0.342, + "args": { + "External id": 2935877,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "13173248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273568.658, "dur": 0.240, + "args": { + "External id": 2935878,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14615040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273569.731, "dur": 1.485, + "args": { + "External id": 2935879,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14615296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273574.966, "dur": 0.225, + "args": { + "External id": 2935880,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "15139584"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273575.937, "dur": 0.434, + "args": { + "External id": 2935881,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "15663872"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273576.913, "dur": 1.842, + "args": { + "External id": 2935882,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "16188160"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273581.331, "dur": 0.483, + "args": { + "External id": 2935883,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "16712448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273582.484, "dur": 0.310, + "args": { + "External id": 2935884,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16712704"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273587.539, "dur": 0.325, + "args": { + "External id": 2935885,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "18154496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273588.471, "dur": 0.168, + "args": { + "External id": 2935886,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "19596288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273589.379, "dur": 1.499, + "args": { + "External id": 2935887,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21038080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273593.618, "dur": 0.273, + "args": { + "External id": 2935888,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "21038336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273594.622, "dur": 0.173, + "args": { + "External id": 2935889,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "21562624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273595.240, "dur": 2.273, + "args": { + "External id": 2935890,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "22086912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273600.237, "dur": 0.500, + "args": { + "External id": 2935891,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "22611200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273601.196, "dur": 0.363, + "args": { + "External id": 2935892,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "23135488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273605.651, "dur": 0.260, + "args": { + "External id": 2935893,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "23135744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273606.459, "dur": 0.363, + "args": { + "External id": 2935894,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24577536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273607.568, "dur": 1.517, + "args": { + "External id": 2935895,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "26019328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273611.442, "dur": 0.310, + "args": { + "External id": 2935896,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "27461120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273612.667, "dur": 0.460, + "args": { + "External id": 2935897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "27461376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273613.578, "dur": 1.891, + "args": { + "External id": 2935898,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "27985664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273618.142, "dur": 0.433, + "args": { + "External id": 2935899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "28509952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273619.143, "dur": 0.299, + "args": { + "External id": 2935900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29034240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273623.750, "dur": 0.253, + "args": { + "External id": 2935901,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "29558528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273624.860, "dur": 0.264, + "args": { + "External id": 2935902,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "29558784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273625.848, "dur": 1.146, + "args": { + "External id": 2935903,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "31000576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273629.574, "dur": 0.167, + "args": { + "External id": 2935904,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "32442368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273630.514, "dur": 0.163, + "args": { + "External id": 2935905,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "33884160"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273631.230, "dur": 2.147, + "args": { + "External id": 2935906,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "33884416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015273673.159, "dur": 81.993, + "args": { + "External id": 2935907,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015273823.696, "dur": 295.399, + "args": { + "External id": 2935908,"Record function id": 0, "Concrete Inputs": ["", "", "42076416", "8", "2", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015273840.426, "dur": 4.082, + "args": { + "External id": 2935909,"Record function id": 0, "Concrete Inputs": ["[336611328]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015273851.715, "dur": 12.540, + "args": { + "External id": 2935910,"Record function id": 0, "Concrete Inputs": ["", "0", "84152832", "42076416"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[336611328], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015273856.056, "dur": 7.744, + "args": { + "External id": 2935911,"Record function id": 0, "Concrete Inputs": ["", "0", "84152832", "126229248", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[336611328], [], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273860.701, "dur": 0.613, + "args": { + "External id": 2935912,"Record function id": 0, "Concrete Inputs": ["", "[42076416]", "[1]", "84152832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[336611328], [], [], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015273871.701, "dur": 107.095, + "args": { + "External id": 2935913,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[42076416], [], []], "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273873.287, "dur": 0.389, + "args": { + "External id": 2935914,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "84152832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273874.977, "dur": 0.342, + "args": { + "External id": 2935915,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "92344832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273878.237, "dur": 1.751, + "args": { + "External id": 2935916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "92345088"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273880.915, "dur": 0.960, + "args": { + "External id": 2935917,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "92869376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273884.393, "dur": 0.240, + "args": { + "External id": 2935918,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "93393664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273885.702, "dur": 0.482, + "args": { + "External id": 2935919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "93917952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273887.011, "dur": 0.284, + "args": { + "External id": 2935920,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "94442240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273889.988, "dur": 0.246, + "args": { + "External id": 2935921,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "94442496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273891.145, "dur": 0.535, + "args": { + "External id": 2935922,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "95884288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273892.407, "dur": 0.332, + "args": { + "External id": 2935923,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "97326080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273895.084, "dur": 1.520, + "args": { + "External id": 2935924,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "98767872"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273897.508, "dur": 0.248, + "args": { + "External id": 2935925,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "98768128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273900.191, "dur": 2.235, + "args": { + "External id": 2935926,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "99292416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273903.444, "dur": 0.450, + "args": { + "External id": 2935927,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "99816704"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273904.475, "dur": 0.324, + "args": { + "External id": 2935928,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100340992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273908.911, "dur": 0.166, + "args": { + "External id": 2935929,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "100865280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273909.538, "dur": 0.241, + "args": { + "External id": 2935930,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "100865536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273910.364, "dur": 0.307, + "args": { + "External id": 2935931,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "102307328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273914.201, "dur": 1.500, + "args": { + "External id": 2935932,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "103749120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273916.657, "dur": 0.138, + "args": { + "External id": 2935933,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "105190912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273919.161, "dur": 1.563, + "args": { + "External id": 2935934,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "105191168"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273921.761, "dur": 0.278, + "args": { + "External id": 2935935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "105715456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273922.612, "dur": 0.179, + "args": { + "External id": 2935936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "106239744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273926.628, "dur": 0.291, + "args": { + "External id": 2935937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "106764032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273927.548, "dur": 0.151, + "args": { + "External id": 2935938,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "107288320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273928.663, "dur": 0.155, + "args": { + "External id": 2935939,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "107288576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273932.868, "dur": 1.495, + "args": { + "External id": 2935940,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "108730368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273935.550, "dur": 0.146, + "args": { + "External id": 2935941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "110172160"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273938.295, "dur": 1.761, + "args": { + "External id": 2935942,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "111613952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273941.359, "dur": 0.158, + "args": { + "External id": 2935943,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111614208"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273942.237, "dur": 0.365, + "args": { + "External id": 2935944,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "112138496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273946.919, "dur": 0.174, + "args": { + "External id": 2935945,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "112662784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273947.654, "dur": 0.166, + "args": { + "External id": 2935946,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "113187072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273948.602, "dur": 0.206, + "args": { + "External id": 2935947,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "113711360"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273953.199, "dur": 1.429, + "args": { + "External id": 2935948,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "113711616"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273955.381, "dur": 0.183, + "args": { + "External id": 2935949,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "115153408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273957.930, "dur": 1.824, + "args": { + "External id": 2935950,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "116595200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273960.528, "dur": 0.319, + "args": { + "External id": 2935951,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "118036992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015273961.536, "dur": 0.327, + "args": { + "External id": 2935952,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "118037248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015274038.177, "dur": 63.263, + "args": { + "External id": 2935953,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015274193.517, "dur": 368.972, + "args": { + "External id": 2935954,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[336611328], [42076416], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015274227.934, "dur": 329.850, + "args": { + "External id": 2935955,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 336611328, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[42076416], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5930, "In msg nelems": 42076416 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015274238.674, "dur": 313.552, + "args": { + "External id": 2935956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[42076416]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015274586.425, "dur": 2.206, + "args": { + "External id": 2935957,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5932, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 1336755, "tid": 1336755, + "ts": 1555015274681.064, "dur": 7067.319, + "args": { + "External id": 2935958,"Record function id": 0, "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274897.387, "dur": 6.430, + "args": { + "External id": 2935959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[336611328], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274907.153, "dur": 1.502, + "args": { + "External id": 2935960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274910.583, "dur": 1.046, + "args": { + "External id": 2935961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274913.603, "dur": 2.120, + "args": { + "External id": 2935962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274916.969, "dur": 1.027, + "args": { + "External id": 2935963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274919.333, "dur": 0.990, + "args": { + "External id": 2935964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274923.336, "dur": 1.003, + "args": { + "External id": 2935965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274925.494, "dur": 1.973, + "args": { + "External id": 2935966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274930.404, "dur": 0.813, + "args": { + "External id": 2935967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274932.267, "dur": 0.984, + "args": { + "External id": 2935968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274936.190, "dur": 0.810, + "args": { + "External id": 2935969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274938.374, "dur": 1.546, + "args": { + "External id": 2935970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274941.470, "dur": 0.677, + "args": { + "External id": 2935971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274943.427, "dur": 0.720, + "args": { + "External id": 2935972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274947.026, "dur": 0.701, + "args": { + "External id": 2935973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274949.228, "dur": 2.569, + "args": { + "External id": 2935974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274954.668, "dur": 0.761, + "args": { + "External id": 2935975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274956.711, "dur": 0.811, + "args": { + "External id": 2935976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274960.657, "dur": 0.756, + "args": { + "External id": 2935977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274962.755, "dur": 2.027, + "args": { + "External id": 2935978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274966.018, "dur": 0.836, + "args": { + "External id": 2935979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274967.942, "dur": 0.883, + "args": { + "External id": 2935980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274971.725, "dur": 0.630, + "args": { + "External id": 2935981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015274973.558, "dur": 1.837, + "args": { + "External id": 2935982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275031.023, "dur": 2.001, + "args": { + "External id": 2935983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275036.828, "dur": 0.885, + "args": { + "External id": 2935984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275040.769, "dur": 0.724, + "args": { + "External id": 2935985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275042.782, "dur": 1.931, + "args": { + "External id": 2935986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275046.239, "dur": 0.654, + "args": { + "External id": 2935987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275048.508, "dur": 0.700, + "args": { + "External id": 2935988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275051.930, "dur": 0.652, + "args": { + "External id": 2935989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275053.910, "dur": 2.753, + "args": { + "External id": 2935990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275059.972, "dur": 0.652, + "args": { + "External id": 2935991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275061.942, "dur": 0.794, + "args": { + "External id": 2935992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275065.566, "dur": 0.623, + "args": { + "External id": 2935993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275067.463, "dur": 1.653, + "args": { + "External id": 2935994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275070.392, "dur": 0.623, + "args": { + "External id": 2935995,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275072.411, "dur": 1.071, + "args": { + "External id": 2935996,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275075.913, "dur": 0.780, + "args": { + "External id": 2935997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015275078.023, "dur": 2.279, + "args": { + "External id": 2935998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015275110.670, "dur": 6585.391, + "args": { + "External id": 2935999,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[42076416, 1], [], [], []], "Input Dims": [[8, 42076416], [], [], []], "Ev Idx": 5974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015275135.567, "dur": 6551.738, + "args": { + "External id": 2936000,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[42076416, 1], [], [], []], "Input Dims": [[8, 42076416], [], [], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015275173.331, "dur": 5.839, + "args": { + "External id": 2936001,"Record function id": 0, "Concrete Inputs": ["[4290]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015275185.808, "dur": 6460.828, + "args": { + "External id": 2936002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4290], [], [], [], [], [], [], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015275188.922, "dur": 6456.983, + "args": { + "External id": 2936003,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4290], [], [], [], [], [], []], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015275195.541, "dur": 7.047, + "args": { + "External id": 2936004,"Record function id": 0, "Concrete Inputs": ["[4290]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015275205.659, "dur": 6437.034, + "args": { + "External id": 2936005,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4290], [4290], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015282066.725, "dur": 38.995, + "args": { + "External id": 2936006,"Record function id": 0, "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 1336755, "tid": 1336755, + "ts": 1555015282107.666, "dur": 241.964, + "args": { + "External id": 2936007,"Record function id": 0, "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015282165.489, "dur": 174.693, + "args": { + "External id": 2936008,"Sequence number": 29294560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[2048, 1], [4096, 1]], "Input Dims": [[32000, 2048], [16, 4096]], "Ev Idx": 5983 + } + }, + { + "ph": "s", "id": 228, "pid": 1336755, "tid": 1336755, "ts": 1555015282165.489, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015282244.132, "dur": 56.269, + "args": { + "External id": 2936009,"kernel_hash": "czrbl3ytmsxdbesfinz7pro3b6utgy4wo4kyhmd4rkzt63qzeqom", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zr/czrbl3ytmsxdbesfinz7pro3b6utgy4wo4kyhmd4rkzt63qzeqom.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096], [32000, 2048], [16, 4096, 2048], []], "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015282413.488, "dur": 56.140, + "args": { + "External id": 2936010,"Record function id": 0, "Ev Idx": 5985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 1336755, "tid": 1336755, + "ts": 1555015282480.640, "dur": 7889.843, + "args": { + "External id": 2936011,"Record function id": 0, "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 1336755, "tid": 1336755, + "ts": 1555015282489.076, "dur": 909.708, + "args": { + "External id": 2936012,"Record function id": 0, "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015282565.894, "dur": 10.254, + "args": { + "External id": 2936013,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015282589.178, "dur": 37.721, + "args": { + "External id": 2936014,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282597.384, "dur": 2.187, + "args": { + "External id": 2936015,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282601.214, "dur": 0.234, + "args": { + "External id": 2936016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282602.328, "dur": 1.797, + "args": { + "External id": 2936017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282606.375, "dur": 0.558, + "args": { + "External id": 2936018,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282607.678, "dur": 0.385, + "args": { + "External id": 2936019,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282612.205, "dur": 0.336, + "args": { + "External id": 2936020,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282613.465, "dur": 0.330, + "args": { + "External id": 2936021,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282614.614, "dur": 1.824, + "args": { + "External id": 2936022,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282619.910, "dur": 0.555, + "args": { + "External id": 2936023,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015282636.158, "dur": 41.758, + "args": { + "External id": 2936024,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015282707.860, "dur": 109.769, + "args": { + "External id": 2936025,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015282718.764, "dur": 3.878, + "args": { + "External id": 2936026,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015282727.633, "dur": 11.069, + "args": { + "External id": 2936027,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015282731.613, "dur": 6.686, + "args": { + "External id": 2936028,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282736.516, "dur": 0.576, + "args": { + "External id": 2936029,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015282744.385, "dur": 28.435, + "args": { + "External id": 2936030,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282745.691, "dur": 0.433, + "args": { + "External id": 2936031,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282748.792, "dur": 0.476, + "args": { + "External id": 2936032,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282751.671, "dur": 0.465, + "args": { + "External id": 2936033,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282754.967, "dur": 0.393, + "args": { + "External id": 2936034,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282756.181, "dur": 3.299, + "args": { + "External id": 2936035,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282760.215, "dur": 0.419, + "args": { + "External id": 2936036,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282763.737, "dur": 0.319, + "args": { + "External id": 2936037,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282766.453, "dur": 0.203, + "args": { + "External id": 2936038,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015282767.527, "dur": 0.286, + "args": { + "External id": 2936039,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015282785.182, "dur": 23.842, + "args": { + "External id": 2936040,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015282866.913, "dur": 435.190, + "args": { + "External id": 2936041,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015282894.443, "dur": 402.183, + "args": { + "External id": 2936042,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6017, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015282904.036, "dur": 385.027, + "args": { + "External id": 2936043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015283325.778, "dur": 2.489, + "args": { + "External id": 2936044,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6019, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 1336755, "tid": 1336755, + "ts": 1555015283419.226, "dur": 6719.706, + "args": { + "External id": 2936045,"Record function id": 0, "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283521.363, "dur": 6.531, + "args": { + "External id": 2936046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283531.193, "dur": 1.068, + "args": { + "External id": 2936047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283533.908, "dur": 0.946, + "args": { + "External id": 2936048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283536.307, "dur": 2.706, + "args": { + "External id": 2936049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283541.985, "dur": 0.768, + "args": { + "External id": 2936050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283543.870, "dur": 1.124, + "args": { + "External id": 2936051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283546.434, "dur": 1.143, + "args": { + "External id": 2936052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283550.511, "dur": 2.410, + "args": { + "External id": 2936053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283556.208, "dur": 0.794, + "args": { + "External id": 2936054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015283558.351, "dur": 0.696, + "args": { + "External id": 2936055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015283576.169, "dur": 6527.049, + "args": { + "External id": 2936056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015283592.699, "dur": 6504.390, + "args": { + "External id": 2936057,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015283621.857, "dur": 13.372, + "args": { + "External id": 2936058,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015283639.823, "dur": 6424.537, + "args": { + "External id": 2936059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015283644.190, "dur": 6419.630, + "args": { + "External id": 2936060,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015283649.089, "dur": 5.446, + "args": { + "External id": 2936061,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015283656.015, "dur": 6405.258, + "args": { + "External id": 2936062,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015290314.401, "dur": 31.742, + "args": { + "External id": 2936063,"Sequence number": 29294561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6038 + } + }, + { + "ph": "s", "id": 227, "pid": 1336755, "tid": 1336755, "ts": 1555015290314.401, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015290332.157, "dur": 9.644, + "args": { + "External id": 2936064,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015290337.183, "dur": 4.312, + "args": { + "External id": 2936065,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015290410.775, "dur": 123.543, + "args": { + "External id": 2936066,"Record function id": 0, "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015290536.108, "dur": 1195.148, + "args": { + "External id": 2936067,"Record function id": 0, "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015290577.459, "dur": 1140.302, + "args": { + "External id": 2936068,"Sequence number": 29294562, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6043 + } + }, + { + "ph": "s", "id": 226, "pid": 1336755, "tid": 1336755, "ts": 1555015290577.459, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015290653.679, "dur": 45.623, + "args": { + "External id": 2936069,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015290711.650, "dur": 111.055, + "args": { + "External id": 2936070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015290835.024, "dur": 39.006, + "args": { + "External id": 2936071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015290882.048, "dur": 31.711, + "args": { + "External id": 2936072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015290935.699, "dur": 29.261, + "args": { + "External id": 2936073,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015291038.782, "dur": 20.492, + "args": { + "External id": 2936074,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015291084.214, "dur": 170.809, + "args": { + "External id": 2936075,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015291141.256, "dur": 41.541, + "args": { + "External id": 2936076,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015291175.773, "dur": 6.059, + "args": { + "External id": 2936077,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015291185.726, "dur": 3.556, + "args": { + "External id": 2936078,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015291192.328, "dur": 1.021, + "args": { + "External id": 2936079,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015291195.817, "dur": 3.027, + "args": { + "External id": 2936080,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015291265.865, "dur": 56.995, + "args": { + "External id": 2936081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015291354.440, "dur": 32.984, + "args": { + "External id": 2936082,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015291394.895, "dur": 44.060, + "args": { + "External id": 2936083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015291447.777, "dur": 36.649, + "args": { + "External id": 2936084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015291509.370, "dur": 28.703, + "args": { + "External id": 2936085,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015291543.343, "dur": 38.736, + "args": { + "External id": 2936086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015291608.979, "dur": 22.006, + "args": { + "External id": 2936087,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 1336755, "tid": 1336755, + "ts": 1555015291793.244, "dur": 84.618, + "args": { + "External id": 2936088,"Record function id": 0, "Ev Idx": 6063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015291948.018, "dur": 106.604, + "args": { + "External id": 2936089,"Record function id": 0, "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 1336755, "tid": 1336755, + "ts": 1555015292065.775, "dur": 19283.978, + "args": { + "External id": 2936090,"Record function id": 0, "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 1336755, "tid": 1336755, + "ts": 1555015292077.058, "dur": 871.246, + "args": { + "External id": 2936091,"Record function id": 0, "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015292179.361, "dur": 9.269, + "args": { + "External id": 2936092,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015292201.616, "dur": 38.656, + "args": { + "External id": 2936093,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292208.124, "dur": 3.378, + "args": { + "External id": 2936094,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292213.144, "dur": 0.565, + "args": { + "External id": 2936095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292215.996, "dur": 0.563, + "args": { + "External id": 2936096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292218.913, "dur": 0.550, + "args": { + "External id": 2936097,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292220.156, "dur": 1.668, + "args": { + "External id": 2936098,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292224.178, "dur": 0.303, + "args": { + "External id": 2936099,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292226.860, "dur": 0.400, + "args": { + "External id": 2936100,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292228.246, "dur": 0.252, + "args": { + "External id": 2936101,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292231.215, "dur": 1.648, + "args": { + "External id": 2936102,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015292250.276, "dur": 45.260, + "args": { + "External id": 2936103,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015292328.296, "dur": 117.106, + "args": { + "External id": 2936104,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015292340.970, "dur": 3.883, + "args": { + "External id": 2936105,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015292350.109, "dur": 9.650, + "args": { + "External id": 2936106,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015292354.396, "dur": 4.977, + "args": { + "External id": 2936107,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292357.493, "dur": 0.679, + "args": { + "External id": 2936108,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015292366.193, "dur": 31.037, + "args": { + "External id": 2936109,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292369.403, "dur": 0.465, + "args": { + "External id": 2936110,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292371.027, "dur": 1.641, + "args": { + "External id": 2936111,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292373.309, "dur": 2.659, + "args": { + "External id": 2936112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292378.747, "dur": 0.386, + "args": { + "External id": 2936113,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292379.699, "dur": 0.298, + "args": { + "External id": 2936114,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292382.522, "dur": 0.377, + "args": { + "External id": 2936115,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292385.768, "dur": 0.347, + "args": { + "External id": 2936116,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292386.932, "dur": 0.286, + "args": { + "External id": 2936117,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015292391.537, "dur": 0.250, + "args": { + "External id": 2936118,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015292409.268, "dur": 27.161, + "args": { + "External id": 2936119,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015292496.360, "dur": 370.824, + "args": { + "External id": 2936120,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015292526.350, "dur": 336.627, + "args": { + "External id": 2936121,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6096, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015292536.425, "dur": 320.609, + "args": { + "External id": 2936122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015292886.363, "dur": 1.945, + "args": { + "External id": 2936123,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6098, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 1336755, "tid": 1336755, + "ts": 1555015292969.943, "dur": 18163.028, + "args": { + "External id": 2936124,"Record function id": 0, "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293130.768, "dur": 6.320, + "args": { + "External id": 2936125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293140.641, "dur": 33.425, + "args": { + "External id": 2936126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293178.373, "dur": 1.293, + "args": { + "External id": 2936127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293181.150, "dur": 1.014, + "args": { + "External id": 2936128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293183.753, "dur": 1.153, + "args": { + "External id": 2936129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293187.805, "dur": 1.080, + "args": { + "External id": 2936130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293192.331, "dur": 1.273, + "args": { + "External id": 2936131,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293194.958, "dur": 4.368, + "args": { + "External id": 2936132,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293200.750, "dur": 0.807, + "args": { + "External id": 2936133,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015293204.243, "dur": 0.814, + "args": { + "External id": 2936134,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015293222.921, "dur": 17870.775, + "args": { + "External id": 2936135,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015293268.229, "dur": 17819.300, + "args": { + "External id": 2936136,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015293299.215, "dur": 12.879, + "args": { + "External id": 2936137,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015293315.800, "dur": 17737.164, + "args": { + "External id": 2936138,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015293318.382, "dur": 17733.951, + "args": { + "External id": 2936139,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015293346.695, "dur": 7.617, + "args": { + "External id": 2936140,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015293356.901, "dur": 17693.459, + "args": { + "External id": 2936141,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015311294.064, "dur": 31.093, + "args": { + "External id": 2936142,"Sequence number": 29294563, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6117 + } + }, + { + "ph": "s", "id": 225, "pid": 1336755, "tid": 1336755, "ts": 1555015311294.064, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015311311.557, "dur": 8.962, + "args": { + "External id": 2936143,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015311315.827, "dur": 4.360, + "args": { + "External id": 2936144,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015311389.526, "dur": 87.332, + "args": { + "External id": 2936145,"Record function id": 0, "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015311478.464, "dur": 1080.664, + "args": { + "External id": 2936146,"Record function id": 0, "Ev Idx": 6121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015311519.497, "dur": 1027.311, + "args": { + "External id": 2936147,"Sequence number": 29294564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6122 + } + }, + { + "ph": "s", "id": 224, "pid": 1336755, "tid": 1336755, "ts": 1555015311519.497, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015311594.584, "dur": 44.782, + "args": { + "External id": 2936148,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015311653.224, "dur": 107.505, + "args": { + "External id": 2936149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015311768.701, "dur": 38.232, + "args": { + "External id": 2936150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015311812.105, "dur": 31.644, + "args": { + "External id": 2936151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015311871.379, "dur": 25.384, + "args": { + "External id": 2936152,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015311911.146, "dur": 17.798, + "args": { + "External id": 2936153,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015311942.172, "dur": 170.795, + "args": { + "External id": 2936154,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015312030.863, "dur": 12.849, + "args": { + "External id": 2936155,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312035.447, "dur": 7.343, + "args": { + "External id": 2936156,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015312047.807, "dur": 5.174, + "args": { + "External id": 2936157,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015312054.073, "dur": 2.691, + "args": { + "External id": 2936158,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015312059.065, "dur": 2.303, + "args": { + "External id": 2936159,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015312123.832, "dur": 70.446, + "args": { + "External id": 2936160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015312228.588, "dur": 30.392, + "args": { + "External id": 2936161,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015312267.893, "dur": 44.028, + "args": { + "External id": 2936162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015312318.194, "dur": 35.449, + "args": { + "External id": 2936163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015312376.383, "dur": 25.767, + "args": { + "External id": 2936164,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015312407.644, "dur": 34.944, + "args": { + "External id": 2936165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015312460.908, "dur": 20.691, + "args": { + "External id": 2936166,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 1336755, "tid": 1336755, + "ts": 1555015312622.287, "dur": 78.324, + "args": { + "External id": 2936167,"Record function id": 0, "Ev Idx": 6142 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015312770.156, "dur": 43.497, + "args": { + "External id": 2936168,"Record function id": 0, "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 1336755, "tid": 1336755, + "ts": 1555015312822.801, "dur": 18665.414, + "args": { + "External id": 2936169,"Record function id": 0, "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 1336755, "tid": 1336755, + "ts": 1555015312831.845, "dur": 905.198, + "args": { + "External id": 2936170,"Record function id": 0, "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015312910.089, "dur": 7.322, + "args": { + "External id": 2936171,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015312930.447, "dur": 34.541, + "args": { + "External id": 2936172,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312935.081, "dur": 2.132, + "args": { + "External id": 2936173,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312940.844, "dur": 0.336, + "args": { + "External id": 2936174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312944.025, "dur": 0.475, + "args": { + "External id": 2936175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312945.540, "dur": 0.499, + "args": { + "External id": 2936176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312948.343, "dur": 0.467, + "args": { + "External id": 2936177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312951.243, "dur": 0.260, + "args": { + "External id": 2936178,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312952.621, "dur": 2.077, + "args": { + "External id": 2936179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312956.857, "dur": 0.414, + "args": { + "External id": 2936180,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015312958.003, "dur": 0.159, + "args": { + "External id": 2936181,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015312974.409, "dur": 83.107, + "args": { + "External id": 2936182,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015313094.551, "dur": 138.862, + "args": { + "External id": 2936183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015313107.797, "dur": 4.920, + "args": { + "External id": 2936184,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015313117.796, "dur": 10.135, + "args": { + "External id": 2936185,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015313122.090, "dur": 5.416, + "args": { + "External id": 2936186,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313125.261, "dur": 0.789, + "args": { + "External id": 2936187,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015313134.612, "dur": 49.428, + "args": { + "External id": 2936188,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313138.219, "dur": 2.190, + "args": { + "External id": 2936189,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313141.466, "dur": 15.225, + "args": { + "External id": 2936190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313159.096, "dur": 0.497, + "args": { + "External id": 2936191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313163.615, "dur": 1.533, + "args": { + "External id": 2936192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313165.917, "dur": 0.376, + "args": { + "External id": 2936193,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313168.518, "dur": 0.500, + "args": { + "External id": 2936194,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313171.841, "dur": 0.518, + "args": { + "External id": 2936195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313173.043, "dur": 0.321, + "args": { + "External id": 2936196,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313176.424, "dur": 1.641, + "args": { + "External id": 2936197,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015313195.708, "dur": 29.116, + "args": { + "External id": 2936198,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015313288.751, "dur": 359.469, + "args": { + "External id": 2936199,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015313317.971, "dur": 325.961, + "args": { + "External id": 2936200,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6175, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015313328.953, "dur": 309.701, + "args": { + "External id": 2936201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015313670.042, "dur": 2.324, + "args": { + "External id": 2936202,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6177, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 1336755, "tid": 1336755, + "ts": 1555015313757.064, "dur": 17542.077, + "args": { + "External id": 2936203,"Record function id": 0, "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313851.716, "dur": 5.296, + "args": { + "External id": 2936204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313860.332, "dur": 1.182, + "args": { + "External id": 2936205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313863.329, "dur": 2.685, + "args": { + "External id": 2936206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313867.841, "dur": 0.910, + "args": { + "External id": 2936207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313870.257, "dur": 0.940, + "args": { + "External id": 2936208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313872.662, "dur": 0.900, + "args": { + "External id": 2936209,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313876.828, "dur": 1.045, + "args": { + "External id": 2936210,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313879.702, "dur": 2.542, + "args": { + "External id": 2936211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313883.441, "dur": 0.781, + "args": { + "External id": 2936212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015313885.573, "dur": 0.875, + "args": { + "External id": 2936213,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015313905.293, "dur": 17355.180, + "args": { + "External id": 2936214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015313919.087, "dur": 17334.687, + "args": { + "External id": 2936215,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015313941.206, "dur": 13.025, + "args": { + "External id": 2936216,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015313957.033, "dur": 17263.537, + "args": { + "External id": 2936217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015313959.383, "dur": 17260.602, + "args": { + "External id": 2936218,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015313964.722, "dur": 4.722, + "args": { + "External id": 2936219,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015313971.145, "dur": 17246.019, + "args": { + "External id": 2936220,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015331435.820, "dur": 28.991, + "args": { + "External id": 2936221,"Sequence number": 29294565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6196 + } + }, + { + "ph": "s", "id": 223, "pid": 1336755, "tid": 1336755, "ts": 1555015331435.820, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015331453.282, "dur": 7.034, + "args": { + "External id": 2936222,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015331456.124, "dur": 3.949, + "args": { + "External id": 2936223,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015331526.466, "dur": 78.474, + "args": { + "External id": 2936224,"Record function id": 0, "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015331606.557, "dur": 1118.139, + "args": { + "External id": 2936225,"Record function id": 0, "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015331646.787, "dur": 1065.621, + "args": { + "External id": 2936226,"Sequence number": 29294566, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6201 + } + }, + { + "ph": "s", "id": 222, "pid": 1336755, "tid": 1336755, "ts": 1555015331646.787, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015331711.498, "dur": 40.687, + "args": { + "External id": 2936227,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015331765.111, "dur": 107.196, + "args": { + "External id": 2936228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015331881.464, "dur": 39.135, + "args": { + "External id": 2936229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015331929.990, "dur": 32.112, + "args": { + "External id": 2936230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015332043.177, "dur": 28.529, + "args": { + "External id": 2936231,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015332090.540, "dur": 16.576, + "args": { + "External id": 2936232,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015332125.110, "dur": 163.037, + "args": { + "External id": 2936233,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015332205.821, "dur": 13.357, + "args": { + "External id": 2936234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015332212.242, "dur": 5.969, + "args": { + "External id": 2936235,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015332221.562, "dur": 6.642, + "args": { + "External id": 2936236,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015332229.505, "dur": 1.065, + "args": { + "External id": 2936237,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015332232.796, "dur": 3.991, + "args": { + "External id": 2936238,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015332299.222, "dur": 53.367, + "args": { + "External id": 2936239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015332383.341, "dur": 30.702, + "args": { + "External id": 2936240,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015332422.688, "dur": 41.817, + "args": { + "External id": 2936241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015332472.501, "dur": 36.228, + "args": { + "External id": 2936242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015332531.277, "dur": 26.006, + "args": { + "External id": 2936243,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015332562.606, "dur": 34.762, + "args": { + "External id": 2936244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015332618.532, "dur": 21.410, + "args": { + "External id": 2936245,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 1336755, "tid": 1336755, + "ts": 1555015332786.284, "dur": 70.401, + "args": { + "External id": 2936246,"Record function id": 0, "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015332928.557, "dur": 44.034, + "args": { + "External id": 2936247,"Record function id": 0, "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 1336755, "tid": 1336755, + "ts": 1555015333031.296, "dur": 18875.398, + "args": { + "External id": 2936248,"Record function id": 0, "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 1336755, "tid": 1336755, + "ts": 1555015333045.544, "dur": 877.421, + "args": { + "External id": 2936249,"Record function id": 0, "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015333127.030, "dur": 8.835, + "args": { + "External id": 2936250,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015333174.757, "dur": 41.995, + "args": { + "External id": 2936251,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333179.684, "dur": 2.114, + "args": { + "External id": 2936252,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333187.758, "dur": 0.540, + "args": { + "External id": 2936253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333189.139, "dur": 0.321, + "args": { + "External id": 2936254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333194.128, "dur": 0.441, + "args": { + "External id": 2936255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333199.007, "dur": 0.707, + "args": { + "External id": 2936256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333200.633, "dur": 0.427, + "args": { + "External id": 2936257,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333203.000, "dur": 3.012, + "args": { + "External id": 2936258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333206.935, "dur": 0.642, + "args": { + "External id": 2936259,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333208.422, "dur": 0.256, + "args": { + "External id": 2936260,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015333229.068, "dur": 44.356, + "args": { + "External id": 2936261,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015333306.562, "dur": 121.524, + "args": { + "External id": 2936262,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015333318.799, "dur": 4.644, + "args": { + "External id": 2936263,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015333328.251, "dur": 9.103, + "args": { + "External id": 2936264,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015333332.387, "dur": 4.583, + "args": { + "External id": 2936265,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333335.236, "dur": 0.526, + "args": { + "External id": 2936266,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015333345.541, "dur": 28.618, + "args": { + "External id": 2936267,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333346.907, "dur": 2.318, + "args": { + "External id": 2936268,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333350.244, "dur": 0.322, + "args": { + "External id": 2936269,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333352.857, "dur": 0.739, + "args": { + "External id": 2936270,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333356.154, "dur": 2.181, + "args": { + "External id": 2936271,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333360.510, "dur": 0.395, + "args": { + "External id": 2936272,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333361.612, "dur": 0.465, + "args": { + "External id": 2936273,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333362.936, "dur": 0.425, + "args": { + "External id": 2936274,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333365.426, "dur": 0.449, + "args": { + "External id": 2936275,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015333366.483, "dur": 2.321, + "args": { + "External id": 2936276,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015333393.230, "dur": 26.412, + "args": { + "External id": 2936277,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015333480.672, "dur": 356.358, + "args": { + "External id": 2936278,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015333510.128, "dur": 322.413, + "args": { + "External id": 2936279,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6254, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015333523.371, "dur": 303.684, + "args": { + "External id": 2936280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015333858.098, "dur": 2.150, + "args": { + "External id": 2936281,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6256, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 1336755, "tid": 1336755, + "ts": 1555015333941.514, "dur": 17767.798, + "args": { + "External id": 2936282,"Record function id": 0, "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334078.465, "dur": 6.725, + "args": { + "External id": 2936283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334088.674, "dur": 1.392, + "args": { + "External id": 2936284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334091.779, "dur": 2.971, + "args": { + "External id": 2936285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334096.386, "dur": 1.165, + "args": { + "External id": 2936286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334098.882, "dur": 1.051, + "args": { + "External id": 2936287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334101.170, "dur": 0.850, + "args": { + "External id": 2936288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334104.857, "dur": 0.861, + "args": { + "External id": 2936289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334107.049, "dur": 2.402, + "args": { + "External id": 2936290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334110.911, "dur": 0.864, + "args": { + "External id": 2936291,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015334113.087, "dur": 0.910, + "args": { + "External id": 2936292,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015334133.578, "dur": 17537.148, + "args": { + "External id": 2936293,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015334170.112, "dur": 17494.411, + "args": { + "External id": 2936294,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015334192.827, "dur": 14.332, + "args": { + "External id": 2936295,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015334210.118, "dur": 17421.319, + "args": { + "External id": 2936296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015334212.605, "dur": 17418.345, + "args": { + "External id": 2936297,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015334219.704, "dur": 6.250, + "args": { + "External id": 2936298,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015334227.668, "dur": 17400.691, + "args": { + "External id": 2936299,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015351841.723, "dur": 41.915, + "args": { + "External id": 2936300,"Sequence number": 29294567, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6275 + } + }, + { + "ph": "s", "id": 221, "pid": 1336755, "tid": 1336755, "ts": 1555015351841.723, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015351872.120, "dur": 7.062, + "args": { + "External id": 2936301,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015351874.956, "dur": 4.012, + "args": { + "External id": 2936302,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015351943.348, "dur": 108.708, + "args": { + "External id": 2936303,"Record function id": 0, "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015352056.736, "dur": 1099.045, + "args": { + "External id": 2936304,"Record function id": 0, "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015352099.282, "dur": 1029.099, + "args": { + "External id": 2936305,"Sequence number": 29294568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6280 + } + }, + { + "ph": "s", "id": 220, "pid": 1336755, "tid": 1336755, "ts": 1555015352099.282, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015352179.697, "dur": 45.279, + "args": { + "External id": 2936306,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015352238.101, "dur": 103.742, + "args": { + "External id": 2936307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015352355.643, "dur": 39.383, + "args": { + "External id": 2936308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015352400.923, "dur": 32.033, + "args": { + "External id": 2936309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015352459.009, "dur": 24.237, + "args": { + "External id": 2936310,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015352498.825, "dur": 15.081, + "args": { + "External id": 2936311,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015352531.857, "dur": 125.370, + "args": { + "External id": 2936312,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015352582.762, "dur": 10.490, + "args": { + "External id": 2936313,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015352587.382, "dur": 5.142, + "args": { + "External id": 2936314,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015352595.519, "dur": 4.611, + "args": { + "External id": 2936315,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015352601.339, "dur": 0.947, + "args": { + "External id": 2936316,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015352606.397, "dur": 4.075, + "args": { + "External id": 2936317,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015352667.305, "dur": 46.919, + "args": { + "External id": 2936318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015352743.891, "dur": 27.362, + "args": { + "External id": 2936319,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015352780.655, "dur": 42.949, + "args": { + "External id": 2936320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015352852.875, "dur": 36.762, + "args": { + "External id": 2936321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015352915.967, "dur": 25.685, + "args": { + "External id": 2936322,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015352947.422, "dur": 72.754, + "args": { + "External id": 2936323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015353042.055, "dur": 21.680, + "args": { + "External id": 2936324,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 1336755, "tid": 1336755, + "ts": 1555015353222.170, "dur": 76.221, + "args": { + "External id": 2936325,"Record function id": 0, "Ev Idx": 6300 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015353369.820, "dur": 44.822, + "args": { + "External id": 2936326,"Record function id": 0, "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 1336755, "tid": 1336755, + "ts": 1555015353423.670, "dur": 18668.880, + "args": { + "External id": 2936327,"Record function id": 0, "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 1336755, "tid": 1336755, + "ts": 1555015353432.713, "dur": 882.225, + "args": { + "External id": 2936328,"Record function id": 0, "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015353512.494, "dur": 7.726, + "args": { + "External id": 2936329,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015353531.688, "dur": 38.221, + "args": { + "External id": 2936330,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353538.259, "dur": 2.011, + "args": { + "External id": 2936331,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353543.897, "dur": 0.575, + "args": { + "External id": 2936332,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353545.251, "dur": 0.389, + "args": { + "External id": 2936333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353547.916, "dur": 0.499, + "args": { + "External id": 2936334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353550.824, "dur": 0.767, + "args": { + "External id": 2936335,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353553.572, "dur": 0.473, + "args": { + "External id": 2936336,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353554.765, "dur": 3.859, + "args": { + "External id": 2936337,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353559.253, "dur": 0.448, + "args": { + "External id": 2936338,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353561.917, "dur": 0.512, + "args": { + "External id": 2936339,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015353579.825, "dur": 43.007, + "args": { + "External id": 2936340,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015353652.637, "dur": 106.235, + "args": { + "External id": 2936341,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015353664.079, "dur": 3.353, + "args": { + "External id": 2936342,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015353672.239, "dur": 11.276, + "args": { + "External id": 2936343,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015353676.342, "dur": 6.776, + "args": { + "External id": 2936344,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353681.182, "dur": 0.718, + "args": { + "External id": 2936345,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015353689.538, "dur": 27.095, + "args": { + "External id": 2936346,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353690.850, "dur": 2.178, + "args": { + "External id": 2936347,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353695.388, "dur": 0.506, + "args": { + "External id": 2936348,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353696.476, "dur": 0.379, + "args": { + "External id": 2936349,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353700.965, "dur": 1.625, + "args": { + "External id": 2936350,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353703.260, "dur": 0.232, + "args": { + "External id": 2936351,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353704.362, "dur": 0.350, + "args": { + "External id": 2936352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353706.926, "dur": 0.531, + "args": { + "External id": 2936353,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353708.190, "dur": 0.516, + "args": { + "External id": 2936354,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015353709.443, "dur": 1.954, + "args": { + "External id": 2936355,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015353727.025, "dur": 24.885, + "args": { + "External id": 2936356,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015353807.649, "dur": 413.191, + "args": { + "External id": 2936357,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015353836.706, "dur": 378.687, + "args": { + "External id": 2936358,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6333, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015353847.179, "dur": 361.895, + "args": { + "External id": 2936359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015354244.785, "dur": 2.184, + "args": { + "External id": 2936360,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6335, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 1336755, "tid": 1336755, + "ts": 1555015354334.812, "dur": 17545.538, + "args": { + "External id": 2936361,"Record function id": 0, "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354429.252, "dur": 6.312, + "args": { + "External id": 2936362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354438.683, "dur": 1.141, + "args": { + "External id": 2936363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354441.386, "dur": 2.950, + "args": { + "External id": 2936364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354445.714, "dur": 1.020, + "args": { + "External id": 2936365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354447.978, "dur": 0.809, + "args": { + "External id": 2936366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354449.980, "dur": 0.914, + "args": { + "External id": 2936367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354453.918, "dur": 0.960, + "args": { + "External id": 2936368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354457.592, "dur": 2.317, + "args": { + "External id": 2936369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354461.044, "dur": 0.901, + "args": { + "External id": 2936370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015354463.328, "dur": 1.266, + "args": { + "External id": 2936371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015354482.078, "dur": 17360.261, + "args": { + "External id": 2936372,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015354497.585, "dur": 17338.151, + "args": { + "External id": 2936373,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015354518.947, "dur": 14.094, + "args": { + "External id": 2936374,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015354535.934, "dur": 17267.606, + "args": { + "External id": 2936375,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015354538.284, "dur": 17264.633, + "args": { + "External id": 2936376,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015354543.478, "dur": 5.010, + "args": { + "External id": 2936377,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015354550.353, "dur": 17249.649, + "args": { + "External id": 2936378,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015372037.182, "dur": 30.892, + "args": { + "External id": 2936379,"Sequence number": 29294569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6354 + } + }, + { + "ph": "s", "id": 219, "pid": 1336755, "tid": 1336755, "ts": 1555015372037.182, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015372054.354, "dur": 9.238, + "args": { + "External id": 2936380,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015372058.962, "dur": 4.260, + "args": { + "External id": 2936381,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015372130.378, "dur": 90.964, + "args": { + "External id": 2936382,"Record function id": 0, "Ev Idx": 6357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015372224.052, "dur": 1063.832, + "args": { + "External id": 2936383,"Record function id": 0, "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015372266.290, "dur": 1008.779, + "args": { + "External id": 2936384,"Sequence number": 29294570, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6359 + } + }, + { + "ph": "s", "id": 218, "pid": 1336755, "tid": 1336755, "ts": 1555015372266.290, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015372331.672, "dur": 43.772, + "args": { + "External id": 2936385,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015372387.055, "dur": 105.222, + "args": { + "External id": 2936386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015372501.545, "dur": 40.047, + "args": { + "External id": 2936387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015372550.332, "dur": 31.488, + "args": { + "External id": 2936388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015372603.944, "dur": 25.016, + "args": { + "External id": 2936389,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015372645.116, "dur": 14.443, + "args": { + "External id": 2936390,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015372677.677, "dur": 121.233, + "args": { + "External id": 2936391,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015372724.756, "dur": 9.870, + "args": { + "External id": 2936392,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015372729.099, "dur": 4.829, + "args": { + "External id": 2936393,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015372736.840, "dur": 5.888, + "args": { + "External id": 2936394,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015372745.142, "dur": 1.069, + "args": { + "External id": 2936395,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015372748.580, "dur": 3.868, + "args": { + "External id": 2936396,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015372809.128, "dur": 45.740, + "args": { + "External id": 2936397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015372883.083, "dur": 28.145, + "args": { + "External id": 2936398,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015372919.936, "dur": 42.455, + "args": { + "External id": 2936399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015372970.009, "dur": 76.223, + "args": { + "External id": 2936400,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015373072.272, "dur": 28.197, + "args": { + "External id": 2936401,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015373107.146, "dur": 50.775, + "args": { + "External id": 2936402,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015373179.644, "dur": 21.196, + "args": { + "External id": 2936403,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 1336755, "tid": 1336755, + "ts": 1555015373350.761, "dur": 76.172, + "args": { + "External id": 2936404,"Record function id": 0, "Ev Idx": 6379 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015373495.543, "dur": 43.612, + "args": { + "External id": 2936405,"Record function id": 0, "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 1336755, "tid": 1336755, + "ts": 1555015373548.133, "dur": 18819.615, + "args": { + "External id": 2936406,"Record function id": 0, "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 1336755, "tid": 1336755, + "ts": 1555015373557.555, "dur": 925.454, + "args": { + "External id": 2936407,"Record function id": 0, "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015373635.697, "dur": 7.601, + "args": { + "External id": 2936408,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015373655.221, "dur": 35.357, + "args": { + "External id": 2936409,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373661.157, "dur": 2.170, + "args": { + "External id": 2936410,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373666.843, "dur": 0.506, + "args": { + "External id": 2936411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373669.225, "dur": 0.655, + "args": { + "External id": 2936412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373670.496, "dur": 0.764, + "args": { + "External id": 2936413,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373673.850, "dur": 0.423, + "args": { + "External id": 2936414,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373676.140, "dur": 0.369, + "args": { + "External id": 2936415,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373677.096, "dur": 2.956, + "args": { + "External id": 2936416,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373680.686, "dur": 0.514, + "args": { + "External id": 2936417,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373683.156, "dur": 0.245, + "args": { + "External id": 2936418,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015373700.620, "dur": 40.141, + "args": { + "External id": 2936419,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015373771.060, "dur": 107.251, + "args": { + "External id": 2936420,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015373783.978, "dur": 3.453, + "args": { + "External id": 2936421,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015373792.492, "dur": 9.387, + "args": { + "External id": 2936422,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015373796.586, "dur": 4.918, + "args": { + "External id": 2936423,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373799.366, "dur": 0.838, + "args": { + "External id": 2936424,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015373808.813, "dur": 25.996, + "args": { + "External id": 2936425,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373811.114, "dur": 1.828, + "args": { + "External id": 2936426,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373814.011, "dur": 0.510, + "args": { + "External id": 2936427,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373815.268, "dur": 0.547, + "args": { + "External id": 2936428,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373819.200, "dur": 1.748, + "args": { + "External id": 2936429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373821.838, "dur": 0.277, + "args": { + "External id": 2936430,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373822.831, "dur": 0.250, + "args": { + "External id": 2936431,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373824.926, "dur": 0.281, + "args": { + "External id": 2936432,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373825.912, "dur": 0.433, + "args": { + "External id": 2936433,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015373827.771, "dur": 1.911, + "args": { + "External id": 2936434,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015373845.968, "dur": 24.635, + "args": { + "External id": 2936435,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015373929.699, "dur": 454.018, + "args": { + "External id": 2936436,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015373959.141, "dur": 418.712, + "args": { + "External id": 2936437,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6412, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015373970.603, "dur": 400.797, + "args": { + "External id": 2936438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015374408.257, "dur": 2.446, + "args": { + "External id": 2936439,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6414, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 1336755, "tid": 1336755, + "ts": 1555015374503.693, "dur": 17673.936, + "args": { + "External id": 2936440,"Record function id": 0, "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374602.054, "dur": 6.370, + "args": { + "External id": 2936441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374611.898, "dur": 1.385, + "args": { + "External id": 2936442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374615.404, "dur": 3.078, + "args": { + "External id": 2936443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374620.314, "dur": 0.813, + "args": { + "External id": 2936444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374622.558, "dur": 0.897, + "args": { + "External id": 2936445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374625.049, "dur": 1.095, + "args": { + "External id": 2936446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374629.333, "dur": 0.745, + "args": { + "External id": 2936447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374631.700, "dur": 2.267, + "args": { + "External id": 2936448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374635.335, "dur": 1.036, + "args": { + "External id": 2936449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015374637.667, "dur": 1.009, + "args": { + "External id": 2936450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015374658.055, "dur": 17462.781, + "args": { + "External id": 2936451,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015374678.925, "dur": 17435.225, + "args": { + "External id": 2936452,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015374698.638, "dur": 12.434, + "args": { + "External id": 2936453,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015374713.879, "dur": 17368.003, + "args": { + "External id": 2936454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015374716.369, "dur": 17364.959, + "args": { + "External id": 2936455,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015374722.004, "dur": 4.674, + "args": { + "External id": 2936456,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015374728.434, "dur": 17350.287, + "args": { + "External id": 2936457,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015392315.664, "dur": 28.208, + "args": { + "External id": 2936458,"Sequence number": 29294571, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6433 + } + }, + { + "ph": "s", "id": 217, "pid": 1336755, "tid": 1336755, "ts": 1555015392315.664, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015392331.443, "dur": 7.899, + "args": { + "External id": 2936459,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015392334.932, "dur": 4.131, + "args": { + "External id": 2936460,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015392404.601, "dur": 81.913, + "args": { + "External id": 2936461,"Record function id": 0, "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015392488.014, "dur": 1059.374, + "args": { + "External id": 2936462,"Record function id": 0, "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015392527.820, "dur": 1007.485, + "args": { + "External id": 2936463,"Sequence number": 29294572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6438 + } + }, + { + "ph": "s", "id": 216, "pid": 1336755, "tid": 1336755, "ts": 1555015392527.820, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015392590.105, "dur": 39.788, + "args": { + "External id": 2936464,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015392641.435, "dur": 101.264, + "args": { + "External id": 2936465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015392753.187, "dur": 38.751, + "args": { + "External id": 2936466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015392799.793, "dur": 33.169, + "args": { + "External id": 2936467,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015392855.324, "dur": 24.994, + "args": { + "External id": 2936468,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015392896.931, "dur": 17.416, + "args": { + "External id": 2936469,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015392935.942, "dur": 166.615, + "args": { + "External id": 2936470,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015393020.431, "dur": 12.927, + "args": { + "External id": 2936471,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393026.429, "dur": 5.825, + "args": { + "External id": 2936472,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015393036.010, "dur": 4.586, + "args": { + "External id": 2936473,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015393041.993, "dur": 1.128, + "args": { + "External id": 2936474,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015393045.747, "dur": 5.140, + "args": { + "External id": 2936475,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015393113.529, "dur": 66.671, + "args": { + "External id": 2936476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015393214.399, "dur": 34.004, + "args": { + "External id": 2936477,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015393256.814, "dur": 45.989, + "args": { + "External id": 2936478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015393312.106, "dur": 35.482, + "args": { + "External id": 2936479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015393367.698, "dur": 24.955, + "args": { + "External id": 2936480,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015393398.275, "dur": 35.652, + "args": { + "External id": 2936481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015393453.990, "dur": 17.592, + "args": { + "External id": 2936482,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 1336755, "tid": 1336755, + "ts": 1555015393610.948, "dur": 71.200, + "args": { + "External id": 2936483,"Record function id": 0, "Ev Idx": 6458 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015393752.068, "dur": 44.803, + "args": { + "External id": 2936484,"Record function id": 0, "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 1336755, "tid": 1336755, + "ts": 1555015393806.026, "dur": 17837.706, + "args": { + "External id": 2936485,"Record function id": 0, "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 1336755, "tid": 1336755, + "ts": 1555015393814.572, "dur": 877.776, + "args": { + "External id": 2936486,"Record function id": 0, "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015393890.627, "dur": 6.958, + "args": { + "External id": 2936487,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015393909.499, "dur": 35.334, + "args": { + "External id": 2936488,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393915.057, "dur": 2.297, + "args": { + "External id": 2936489,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393921.247, "dur": 0.606, + "args": { + "External id": 2936490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393923.074, "dur": 0.537, + "args": { + "External id": 2936491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393925.350, "dur": 0.570, + "args": { + "External id": 2936492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393928.525, "dur": 0.399, + "args": { + "External id": 2936493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393930.273, "dur": 0.372, + "args": { + "External id": 2936494,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393932.182, "dur": 2.806, + "args": { + "External id": 2936495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393936.280, "dur": 0.332, + "args": { + "External id": 2936496,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015393937.842, "dur": 0.422, + "args": { + "External id": 2936497,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015393954.231, "dur": 80.869, + "args": { + "External id": 2936498,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015394071.743, "dur": 135.037, + "args": { + "External id": 2936499,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015394084.344, "dur": 5.549, + "args": { + "External id": 2936500,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015394094.933, "dur": 10.643, + "args": { + "External id": 2936501,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015394098.858, "dur": 6.328, + "args": { + "External id": 2936502,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394102.544, "dur": 0.877, + "args": { + "External id": 2936503,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015394112.545, "dur": 44.145, + "args": { + "External id": 2936504,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394114.750, "dur": 2.187, + "args": { + "External id": 2936505,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394118.258, "dur": 0.591, + "args": { + "External id": 2936506,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394120.304, "dur": 0.400, + "args": { + "External id": 2936507,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394123.919, "dur": 2.272, + "args": { + "External id": 2936508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394127.441, "dur": 0.345, + "args": { + "External id": 2936509,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394129.190, "dur": 0.371, + "args": { + "External id": 2936510,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394131.192, "dur": 0.381, + "args": { + "External id": 2936511,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394133.142, "dur": 0.434, + "args": { + "External id": 2936512,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394135.024, "dur": 1.888, + "args": { + "External id": 2936513,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015394169.781, "dur": 28.762, + "args": { + "External id": 2936514,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015394260.543, "dur": 348.161, + "args": { + "External id": 2936515,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015394291.095, "dur": 313.101, + "args": { + "External id": 2936516,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6491, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015394302.107, "dur": 296.626, + "args": { + "External id": 2936517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015394629.988, "dur": 2.115, + "args": { + "External id": 2936518,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6493, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 1336755, "tid": 1336755, + "ts": 1555015394711.232, "dur": 16750.438, + "args": { + "External id": 2936519,"Record function id": 0, "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394800.996, "dur": 5.584, + "args": { + "External id": 2936520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394810.142, "dur": 1.127, + "args": { + "External id": 2936521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394812.828, "dur": 2.548, + "args": { + "External id": 2936522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394817.556, "dur": 1.153, + "args": { + "External id": 2936523,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394820.224, "dur": 1.331, + "args": { + "External id": 2936524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394822.893, "dur": 1.471, + "args": { + "External id": 2936525,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394827.730, "dur": 1.290, + "args": { + "External id": 2936526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394830.439, "dur": 2.539, + "args": { + "External id": 2936527,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394834.481, "dur": 1.155, + "args": { + "External id": 2936528,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015394836.916, "dur": 0.788, + "args": { + "External id": 2936529,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015394855.702, "dur": 16562.597, + "args": { + "External id": 2936530,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015394870.074, "dur": 16541.755, + "args": { + "External id": 2936531,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015394891.499, "dur": 14.167, + "args": { + "External id": 2936532,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015394908.478, "dur": 16469.559, + "args": { + "External id": 2936533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015394910.945, "dur": 16466.560, + "args": { + "External id": 2936534,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015394915.978, "dur": 7.398, + "args": { + "External id": 2936535,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015394924.913, "dur": 16449.818, + "args": { + "External id": 2936536,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015411592.977, "dur": 27.802, + "args": { + "External id": 2936537,"Sequence number": 29294573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6512 + } + }, + { + "ph": "s", "id": 215, "pid": 1336755, "tid": 1336755, "ts": 1555015411592.977, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015411608.884, "dur": 7.338, + "args": { + "External id": 2936538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015411612.009, "dur": 4.004, + "args": { + "External id": 2936539,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015411680.897, "dur": 77.366, + "args": { + "External id": 2936540,"Record function id": 0, "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015411759.984, "dur": 1048.683, + "args": { + "External id": 2936541,"Record function id": 0, "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015411800.116, "dur": 996.245, + "args": { + "External id": 2936542,"Sequence number": 29294574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6517 + } + }, + { + "ph": "s", "id": 214, "pid": 1336755, "tid": 1336755, "ts": 1555015411800.116, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015411861.273, "dur": 41.014, + "args": { + "External id": 2936543,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015411914.239, "dur": 133.848, + "args": { + "External id": 2936544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015412063.631, "dur": 44.562, + "args": { + "External id": 2936545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015412116.149, "dur": 43.458, + "args": { + "External id": 2936546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015412188.257, "dur": 26.875, + "args": { + "External id": 2936547,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015412232.790, "dur": 16.058, + "args": { + "External id": 2936548,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015412268.376, "dur": 124.768, + "args": { + "External id": 2936549,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015412317.964, "dur": 11.418, + "args": { + "External id": 2936550,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015412322.461, "dur": 6.186, + "args": { + "External id": 2936551,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015412331.855, "dur": 5.390, + "args": { + "External id": 2936552,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015412338.491, "dur": 0.972, + "args": { + "External id": 2936553,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015412342.032, "dur": 4.048, + "args": { + "External id": 2936554,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015412402.992, "dur": 50.672, + "args": { + "External id": 2936555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015412483.331, "dur": 30.583, + "args": { + "External id": 2936556,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015412522.217, "dur": 41.718, + "args": { + "External id": 2936557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015412571.766, "dur": 36.206, + "args": { + "External id": 2936558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015412629.370, "dur": 24.840, + "args": { + "External id": 2936559,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015412659.609, "dur": 34.569, + "args": { + "External id": 2936560,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015412712.398, "dur": 17.313, + "args": { + "External id": 2936561,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 1336755, "tid": 1336755, + "ts": 1555015412871.841, "dur": 76.631, + "args": { + "External id": 2936562,"Record function id": 0, "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015413062.921, "dur": 50.382, + "args": { + "External id": 2936563,"Record function id": 0, "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 1336755, "tid": 1336755, + "ts": 1555015413122.855, "dur": 18150.180, + "args": { + "External id": 2936564,"Record function id": 0, "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 1336755, "tid": 1336755, + "ts": 1555015413130.740, "dur": 830.315, + "args": { + "External id": 2936565,"Record function id": 0, "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015413224.982, "dur": 8.778, + "args": { + "External id": 2936566,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015413246.698, "dur": 36.996, + "args": { + "External id": 2936567,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413251.775, "dur": 2.156, + "args": { + "External id": 2936568,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413257.802, "dur": 0.414, + "args": { + "External id": 2936569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413259.539, "dur": 0.561, + "args": { + "External id": 2936570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413262.307, "dur": 0.658, + "args": { + "External id": 2936571,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413266.139, "dur": 0.451, + "args": { + "External id": 2936572,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413267.714, "dur": 0.524, + "args": { + "External id": 2936573,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413269.536, "dur": 3.416, + "args": { + "External id": 2936574,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413274.196, "dur": 0.245, + "args": { + "External id": 2936575,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413276.240, "dur": 0.315, + "args": { + "External id": 2936576,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015413293.866, "dur": 41.706, + "args": { + "External id": 2936577,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015413369.280, "dur": 108.771, + "args": { + "External id": 2936578,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015413381.226, "dur": 3.605, + "args": { + "External id": 2936579,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015413389.548, "dur": 9.742, + "args": { + "External id": 2936580,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015413393.610, "dur": 5.258, + "args": { + "External id": 2936581,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413397.147, "dur": 0.538, + "args": { + "External id": 2936582,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015413406.065, "dur": 28.587, + "args": { + "External id": 2936583,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413408.235, "dur": 2.123, + "args": { + "External id": 2936584,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413411.912, "dur": 0.419, + "args": { + "External id": 2936585,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413413.422, "dur": 0.418, + "args": { + "External id": 2936586,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413417.112, "dur": 1.946, + "args": { + "External id": 2936587,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413420.631, "dur": 0.621, + "args": { + "External id": 2936588,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413422.448, "dur": 0.558, + "args": { + "External id": 2936589,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413424.330, "dur": 0.425, + "args": { + "External id": 2936590,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413425.879, "dur": 0.504, + "args": { + "External id": 2936591,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015413427.762, "dur": 1.915, + "args": { + "External id": 2936592,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015413445.634, "dur": 24.873, + "args": { + "External id": 2936593,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015413528.861, "dur": 349.782, + "args": { + "External id": 2936594,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015413557.383, "dur": 316.479, + "args": { + "External id": 2936595,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6570, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015413567.961, "dur": 300.125, + "args": { + "External id": 2936596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015413899.035, "dur": 2.138, + "args": { + "External id": 2936597,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6572, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 1336755, "tid": 1336755, + "ts": 1555015413979.731, "dur": 17087.689, + "args": { + "External id": 2936598,"Record function id": 0, "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414123.722, "dur": 6.513, + "args": { + "External id": 2936599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414134.242, "dur": 1.430, + "args": { + "External id": 2936600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414137.385, "dur": 2.837, + "args": { + "External id": 2936601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414157.485, "dur": 1.435, + "args": { + "External id": 2936602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414162.196, "dur": 1.318, + "args": { + "External id": 2936603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414165.251, "dur": 0.825, + "args": { + "External id": 2936604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414167.803, "dur": 0.910, + "args": { + "External id": 2936605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414171.640, "dur": 2.245, + "args": { + "External id": 2936606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414175.227, "dur": 0.876, + "args": { + "External id": 2936607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015414177.553, "dur": 0.878, + "args": { + "External id": 2936608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015414198.091, "dur": 16825.677, + "args": { + "External id": 2936609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015414213.455, "dur": 16803.617, + "args": { + "External id": 2936610,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015414233.109, "dur": 14.236, + "args": { + "External id": 2936611,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015414251.886, "dur": 16712.077, + "args": { + "External id": 2936612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015414254.474, "dur": 16708.970, + "args": { + "External id": 2936613,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015414259.846, "dur": 4.675, + "args": { + "External id": 2936614,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015414266.213, "dur": 16694.183, + "args": { + "External id": 2936615,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015431217.535, "dur": 29.628, + "args": { + "External id": 2936616,"Sequence number": 29294575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6591 + } + }, + { + "ph": "s", "id": 213, "pid": 1336755, "tid": 1336755, "ts": 1555015431217.535, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015431235.317, "dur": 7.286, + "args": { + "External id": 2936617,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015431238.117, "dur": 4.103, + "args": { + "External id": 2936618,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015431309.806, "dur": 78.361, + "args": { + "External id": 2936619,"Record function id": 0, "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015431390.275, "dur": 1053.497, + "args": { + "External id": 2936620,"Record function id": 0, "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015431431.188, "dur": 999.645, + "args": { + "External id": 2936621,"Sequence number": 29294576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6596 + } + }, + { + "ph": "s", "id": 212, "pid": 1336755, "tid": 1336755, "ts": 1555015431431.188, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015431494.889, "dur": 41.670, + "args": { + "External id": 2936622,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015431548.772, "dur": 103.081, + "args": { + "External id": 2936623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015431661.472, "dur": 38.356, + "args": { + "External id": 2936624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015431709.020, "dur": 32.610, + "args": { + "External id": 2936625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015431763.711, "dur": 23.982, + "args": { + "External id": 2936626,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015431803.379, "dur": 14.620, + "args": { + "External id": 2936627,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015431835.181, "dur": 126.070, + "args": { + "External id": 2936628,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015431881.539, "dur": 10.318, + "args": { + "External id": 2936629,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015431885.899, "dur": 5.163, + "args": { + "External id": 2936630,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015431894.625, "dur": 6.073, + "args": { + "External id": 2936631,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015431907.690, "dur": 1.095, + "args": { + "External id": 2936632,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015431911.101, "dur": 3.396, + "args": { + "External id": 2936633,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015431970.938, "dur": 90.192, + "args": { + "External id": 2936634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015432093.862, "dur": 27.462, + "args": { + "External id": 2936635,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015432130.248, "dur": 59.402, + "args": { + "External id": 2936636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015432199.816, "dur": 37.421, + "args": { + "External id": 2936637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015432259.227, "dur": 27.622, + "args": { + "External id": 2936638,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015432292.516, "dur": 34.625, + "args": { + "External id": 2936639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015432346.246, "dur": 17.625, + "args": { + "External id": 2936640,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6615 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 1336755, "tid": 1336755, + "ts": 1555015432507.133, "dur": 71.786, + "args": { + "External id": 2936641,"Record function id": 0, "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015432648.284, "dur": 42.207, + "args": { + "External id": 2936642,"Record function id": 0, "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 1336755, "tid": 1336755, + "ts": 1555015432699.257, "dur": 18092.878, + "args": { + "External id": 2936643,"Record function id": 0, "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 1336755, "tid": 1336755, + "ts": 1555015432708.035, "dur": 875.395, + "args": { + "External id": 2936644,"Record function id": 0, "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015432784.033, "dur": 6.640, + "args": { + "External id": 2936645,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015432802.250, "dur": 36.596, + "args": { + "External id": 2936646,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432807.176, "dur": 2.100, + "args": { + "External id": 2936647,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432813.444, "dur": 0.306, + "args": { + "External id": 2936648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432815.226, "dur": 0.703, + "args": { + "External id": 2936649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432817.137, "dur": 0.553, + "args": { + "External id": 2936650,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432820.594, "dur": 0.560, + "args": { + "External id": 2936651,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432822.468, "dur": 0.852, + "args": { + "External id": 2936652,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432824.912, "dur": 3.204, + "args": { + "External id": 2936653,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432829.707, "dur": 0.483, + "args": { + "External id": 2936654,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432831.250, "dur": 0.400, + "args": { + "External id": 2936655,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015432848.694, "dur": 38.772, + "args": { + "External id": 2936656,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015432919.410, "dur": 155.893, + "args": { + "External id": 2936657,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015432930.954, "dur": 3.295, + "args": { + "External id": 2936658,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015432939.145, "dur": 10.337, + "args": { + "External id": 2936659,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015432943.086, "dur": 5.943, + "args": { + "External id": 2936660,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432946.868, "dur": 0.714, + "args": { + "External id": 2936661,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015432955.455, "dur": 68.380, + "args": { + "External id": 2936662,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432957.139, "dur": 2.378, + "args": { + "External id": 2936663,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432960.823, "dur": 0.486, + "args": { + "External id": 2936664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432962.628, "dur": 0.451, + "args": { + "External id": 2936665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432966.574, "dur": 1.937, + "args": { + "External id": 2936666,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432969.649, "dur": 0.419, + "args": { + "External id": 2936667,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432971.153, "dur": 0.399, + "args": { + "External id": 2936668,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432974.368, "dur": 0.426, + "args": { + "External id": 2936669,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432976.085, "dur": 0.405, + "args": { + "External id": 2936670,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015432977.544, "dur": 1.793, + "args": { + "External id": 2936671,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015433037.201, "dur": 29.115, + "args": { + "External id": 2936672,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015433127.295, "dur": 369.193, + "args": { + "External id": 2936673,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015433171.469, "dur": 320.413, + "args": { + "External id": 2936674,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6649, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015433184.209, "dur": 299.775, + "args": { + "External id": 2936675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015433517.693, "dur": 2.168, + "args": { + "External id": 2936676,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6651, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 1336755, "tid": 1336755, + "ts": 1555015433602.835, "dur": 17006.191, + "args": { + "External id": 2936677,"Record function id": 0, "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433694.867, "dur": 5.930, + "args": { + "External id": 2936678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433704.628, "dur": 1.304, + "args": { + "External id": 2936679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433707.744, "dur": 2.360, + "args": { + "External id": 2936680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433711.797, "dur": 0.959, + "args": { + "External id": 2936681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433713.957, "dur": 1.034, + "args": { + "External id": 2936682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433716.260, "dur": 1.178, + "args": { + "External id": 2936683,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433720.271, "dur": 1.215, + "args": { + "External id": 2936684,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433722.783, "dur": 2.978, + "args": { + "External id": 2936685,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433727.018, "dur": 1.141, + "args": { + "External id": 2936686,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015433729.470, "dur": 0.977, + "args": { + "External id": 2936687,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015433757.521, "dur": 16807.441, + "args": { + "External id": 2936688,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015433772.610, "dur": 16785.803, + "args": { + "External id": 2936689,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015433790.733, "dur": 13.146, + "args": { + "External id": 2936690,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015433806.746, "dur": 16720.404, + "args": { + "External id": 2936691,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015433808.986, "dur": 16717.471, + "args": { + "External id": 2936692,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015433814.159, "dur": 4.565, + "args": { + "External id": 2936693,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015433820.444, "dur": 16703.266, + "args": { + "External id": 2936694,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015450741.612, "dur": 27.392, + "args": { + "External id": 2936695,"Sequence number": 29294577, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6670 + } + }, + { + "ph": "s", "id": 211, "pid": 1336755, "tid": 1336755, "ts": 1555015450741.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015450757.302, "dur": 7.234, + "args": { + "External id": 2936696,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015450760.200, "dur": 4.129, + "args": { + "External id": 2936697,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015450830.035, "dur": 78.542, + "args": { + "External id": 2936698,"Record function id": 0, "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015450909.951, "dur": 1032.151, + "args": { + "External id": 2936699,"Record function id": 0, "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015450946.865, "dur": 982.732, + "args": { + "External id": 2936700,"Sequence number": 29294578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6675 + } + }, + { + "ph": "s", "id": 210, "pid": 1336755, "tid": 1336755, "ts": 1555015450946.865, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015451038.084, "dur": 43.975, + "args": { + "External id": 2936701,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015451095.473, "dur": 119.539, + "args": { + "External id": 2936702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015451229.578, "dur": 41.431, + "args": { + "External id": 2936703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015451279.491, "dur": 31.345, + "args": { + "External id": 2936704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015451335.086, "dur": 26.264, + "args": { + "External id": 2936705,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015451380.091, "dur": 15.001, + "args": { + "External id": 2936706,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015451413.192, "dur": 122.626, + "args": { + "External id": 2936707,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015451460.996, "dur": 10.564, + "args": { + "External id": 2936708,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015451465.769, "dur": 5.047, + "args": { + "External id": 2936709,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015451474.187, "dur": 4.999, + "args": { + "External id": 2936710,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015451480.500, "dur": 1.298, + "args": { + "External id": 2936711,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015451483.947, "dur": 3.724, + "args": { + "External id": 2936712,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015451545.465, "dur": 45.146, + "args": { + "External id": 2936713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015451618.187, "dur": 26.956, + "args": { + "External id": 2936714,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015451654.317, "dur": 42.311, + "args": { + "External id": 2936715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015451704.113, "dur": 34.966, + "args": { + "External id": 2936716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015451761.491, "dur": 25.854, + "args": { + "External id": 2936717,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015451792.503, "dur": 34.526, + "args": { + "External id": 2936718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015451844.712, "dur": 17.859, + "args": { + "External id": 2936719,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 1336755, "tid": 1336755, + "ts": 1555015452042.717, "dur": 74.279, + "args": { + "External id": 2936720,"Record function id": 0, "Ev Idx": 6695 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015452206.597, "dur": 46.824, + "args": { + "External id": 2936721,"Record function id": 0, "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 1336755, "tid": 1336755, + "ts": 1555015452262.781, "dur": 18281.628, + "args": { + "External id": 2936722,"Record function id": 0, "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 1336755, "tid": 1336755, + "ts": 1555015452271.306, "dur": 869.953, + "args": { + "External id": 2936723,"Record function id": 0, "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015452348.849, "dur": 8.319, + "args": { + "External id": 2936724,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015452369.515, "dur": 34.837, + "args": { + "External id": 2936725,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452374.334, "dur": 2.009, + "args": { + "External id": 2936726,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452380.131, "dur": 0.558, + "args": { + "External id": 2936727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452381.860, "dur": 0.434, + "args": { + "External id": 2936728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452383.284, "dur": 0.534, + "args": { + "External id": 2936729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452386.809, "dur": 0.650, + "args": { + "External id": 2936730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452388.512, "dur": 0.494, + "args": { + "External id": 2936731,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452389.834, "dur": 3.672, + "args": { + "External id": 2936732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452394.453, "dur": 0.276, + "args": { + "External id": 2936733,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452395.597, "dur": 0.378, + "args": { + "External id": 2936734,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015452413.886, "dur": 44.682, + "args": { + "External id": 2936735,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015452489.855, "dur": 118.512, + "args": { + "External id": 2936736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015452502.320, "dur": 3.613, + "args": { + "External id": 2936737,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015452510.956, "dur": 9.859, + "args": { + "External id": 2936738,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015452515.413, "dur": 5.029, + "args": { + "External id": 2936739,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452518.735, "dur": 0.492, + "args": { + "External id": 2936740,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015452527.378, "dur": 30.897, + "args": { + "External id": 2936741,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452529.307, "dur": 1.993, + "args": { + "External id": 2936742,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452535.427, "dur": 0.505, + "args": { + "External id": 2936743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452537.017, "dur": 0.354, + "args": { + "External id": 2936744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452539.846, "dur": 2.334, + "args": { + "External id": 2936745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452543.267, "dur": 0.494, + "args": { + "External id": 2936746,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452544.873, "dur": 1.766, + "args": { + "External id": 2936747,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452547.766, "dur": 0.439, + "args": { + "External id": 2936748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452549.060, "dur": 0.246, + "args": { + "External id": 2936749,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015452551.822, "dur": 0.304, + "args": { + "External id": 2936750,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015452574.375, "dur": 25.374, + "args": { + "External id": 2936751,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015452659.797, "dur": 389.514, + "args": { + "External id": 2936752,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015452689.116, "dur": 354.861, + "args": { + "External id": 2936753,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6728, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015452700.272, "dur": 337.382, + "args": { + "External id": 2936754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015453072.972, "dur": 2.713, + "args": { + "External id": 2936755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6730, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 1336755, "tid": 1336755, + "ts": 1555015453179.348, "dur": 17186.355, + "args": { + "External id": 2936756,"Record function id": 0, "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453274.905, "dur": 6.302, + "args": { + "External id": 2936757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453285.267, "dur": 1.428, + "args": { + "External id": 2936758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453288.330, "dur": 2.817, + "args": { + "External id": 2936759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453292.849, "dur": 1.481, + "args": { + "External id": 2936760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453295.600, "dur": 1.263, + "args": { + "External id": 2936761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453297.982, "dur": 1.142, + "args": { + "External id": 2936762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453302.273, "dur": 1.035, + "args": { + "External id": 2936763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453304.618, "dur": 3.004, + "args": { + "External id": 2936764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453308.753, "dur": 0.799, + "args": { + "External id": 2936765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015453310.720, "dur": 0.729, + "args": { + "External id": 2936766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015453330.021, "dur": 16992.137, + "args": { + "External id": 2936767,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015453345.459, "dur": 16970.187, + "args": { + "External id": 2936768,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015453366.068, "dur": 14.175, + "args": { + "External id": 2936769,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015453383.169, "dur": 16900.707, + "args": { + "External id": 2936770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015453385.553, "dur": 16897.789, + "args": { + "External id": 2936771,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015453390.455, "dur": 6.331, + "args": { + "External id": 2936772,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015453398.540, "dur": 16881.862, + "args": { + "External id": 2936773,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015470493.387, "dur": 27.092, + "args": { + "External id": 2936774,"Sequence number": 29294579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6749 + } + }, + { + "ph": "s", "id": 209, "pid": 1336755, "tid": 1336755, "ts": 1555015470493.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015470508.671, "dur": 7.307, + "args": { + "External id": 2936775,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015470511.730, "dur": 3.998, + "args": { + "External id": 2936776,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015470581.217, "dur": 76.146, + "args": { + "External id": 2936777,"Record function id": 0, "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015470659.018, "dur": 1057.768, + "args": { + "External id": 2936778,"Record function id": 0, "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015470697.223, "dur": 1007.322, + "args": { + "External id": 2936779,"Sequence number": 29294580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6754 + } + }, + { + "ph": "s", "id": 208, "pid": 1336755, "tid": 1336755, "ts": 1555015470697.223, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015470759.369, "dur": 40.623, + "args": { + "External id": 2936780,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015470811.212, "dur": 102.886, + "args": { + "External id": 2936781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015470923.173, "dur": 39.372, + "args": { + "External id": 2936782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015470969.813, "dur": 74.544, + "args": { + "External id": 2936783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015471073.676, "dur": 28.598, + "args": { + "External id": 2936784,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015471119.823, "dur": 14.820, + "args": { + "External id": 2936785,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015471167.906, "dur": 131.644, + "args": { + "External id": 2936786,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015471216.792, "dur": 11.821, + "args": { + "External id": 2936787,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015471221.629, "dur": 6.077, + "args": { + "External id": 2936788,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015471231.306, "dur": 10.109, + "args": { + "External id": 2936789,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015471242.576, "dur": 1.146, + "args": { + "External id": 2936790,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015471246.030, "dur": 4.511, + "args": { + "External id": 2936791,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015471310.490, "dur": 53.907, + "args": { + "External id": 2936792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015471391.331, "dur": 29.448, + "args": { + "External id": 2936793,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015471435.829, "dur": 42.994, + "args": { + "External id": 2936794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015471486.460, "dur": 35.257, + "args": { + "External id": 2936795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015471543.528, "dur": 26.291, + "args": { + "External id": 2936796,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015471574.995, "dur": 34.188, + "args": { + "External id": 2936797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015471626.922, "dur": 16.453, + "args": { + "External id": 2936798,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 1336755, "tid": 1336755, + "ts": 1555015471778.973, "dur": 73.483, + "args": { + "External id": 2936799,"Record function id": 0, "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015471922.918, "dur": 47.445, + "args": { + "External id": 2936800,"Record function id": 0, "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 1336755, "tid": 1336755, + "ts": 1555015471979.032, "dur": 18336.020, + "args": { + "External id": 2936801,"Record function id": 0, "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 1336755, "tid": 1336755, + "ts": 1555015472029.189, "dur": 873.136, + "args": { + "External id": 2936802,"Record function id": 0, "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015472109.654, "dur": 8.534, + "args": { + "External id": 2936803,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015472130.924, "dur": 51.675, + "args": { + "External id": 2936804,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472135.587, "dur": 2.198, + "args": { + "External id": 2936805,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472141.560, "dur": 14.368, + "args": { + "External id": 2936806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472158.544, "dur": 0.468, + "args": { + "External id": 2936807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472160.107, "dur": 0.612, + "args": { + "External id": 2936808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472163.490, "dur": 0.539, + "args": { + "External id": 2936809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472165.655, "dur": 0.852, + "args": { + "External id": 2936810,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472167.770, "dur": 3.264, + "args": { + "External id": 2936811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472172.446, "dur": 0.589, + "args": { + "External id": 2936812,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472174.344, "dur": 0.340, + "args": { + "External id": 2936813,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015472194.709, "dur": 42.648, + "args": { + "External id": 2936814,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015472271.014, "dur": 113.098, + "args": { + "External id": 2936815,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015472284.160, "dur": 4.547, + "args": { + "External id": 2936816,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015472293.755, "dur": 10.325, + "args": { + "External id": 2936817,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015472298.045, "dur": 5.613, + "args": { + "External id": 2936818,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472301.498, "dur": 0.811, + "args": { + "External id": 2936819,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015472311.575, "dur": 29.644, + "args": { + "External id": 2936820,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472313.699, "dur": 1.871, + "args": { + "External id": 2936821,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472316.989, "dur": 0.520, + "args": { + "External id": 2936822,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472319.392, "dur": 0.403, + "args": { + "External id": 2936823,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472323.222, "dur": 1.836, + "args": { + "External id": 2936824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472326.335, "dur": 0.257, + "args": { + "External id": 2936825,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472327.647, "dur": 0.334, + "args": { + "External id": 2936826,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472330.997, "dur": 0.358, + "args": { + "External id": 2936827,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472332.690, "dur": 0.248, + "args": { + "External id": 2936828,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015472334.380, "dur": 1.993, + "args": { + "External id": 2936829,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015472351.648, "dur": 24.090, + "args": { + "External id": 2936830,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015472435.184, "dur": 380.640, + "args": { + "External id": 2936831,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015472465.486, "dur": 345.771, + "args": { + "External id": 2936832,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6807, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015472502.330, "dur": 303.646, + "args": { + "External id": 2936833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015472837.325, "dur": 2.275, + "args": { + "External id": 2936834,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6809, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 1336755, "tid": 1336755, + "ts": 1555015472921.537, "dur": 17187.113, + "args": { + "External id": 2936835,"Record function id": 0, "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473060.591, "dur": 6.334, + "args": { + "External id": 2936836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473070.584, "dur": 1.449, + "args": { + "External id": 2936837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473073.757, "dur": 3.169, + "args": { + "External id": 2936838,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473079.009, "dur": 1.044, + "args": { + "External id": 2936839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473081.279, "dur": 1.000, + "args": { + "External id": 2936840,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473084.189, "dur": 1.060, + "args": { + "External id": 2936841,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473088.095, "dur": 1.308, + "args": { + "External id": 2936842,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473090.896, "dur": 2.559, + "args": { + "External id": 2936843,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473094.880, "dur": 1.036, + "args": { + "External id": 2936844,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015473097.395, "dur": 0.905, + "args": { + "External id": 2936845,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015473117.870, "dur": 16953.864, + "args": { + "External id": 2936846,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015473134.222, "dur": 16931.213, + "args": { + "External id": 2936847,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015473168.025, "dur": 14.140, + "args": { + "External id": 2936848,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015473185.441, "dur": 16847.545, + "args": { + "External id": 2936849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015473188.172, "dur": 16844.325, + "args": { + "External id": 2936850,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015473194.104, "dur": 7.391, + "args": { + "External id": 2936851,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015473203.370, "dur": 16826.205, + "args": { + "External id": 2936852,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015490255.368, "dur": 34.363, + "args": { + "External id": 2936853,"Sequence number": 29294581, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6828 + } + }, + { + "ph": "s", "id": 207, "pid": 1336755, "tid": 1336755, "ts": 1555015490255.368, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015490277.862, "dur": 7.270, + "args": { + "External id": 2936854,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015490280.873, "dur": 3.923, + "args": { + "External id": 2936855,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015490353.434, "dur": 77.465, + "args": { + "External id": 2936856,"Record function id": 0, "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015490432.805, "dur": 1049.578, + "args": { + "External id": 2936857,"Record function id": 0, "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015490473.169, "dur": 996.633, + "args": { + "External id": 2936858,"Sequence number": 29294582, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6833 + } + }, + { + "ph": "s", "id": 206, "pid": 1336755, "tid": 1336755, "ts": 1555015490473.169, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015490533.389, "dur": 42.260, + "args": { + "External id": 2936859,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015490588.242, "dur": 102.056, + "args": { + "External id": 2936860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015490700.478, "dur": 39.302, + "args": { + "External id": 2936861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015490747.103, "dur": 31.404, + "args": { + "External id": 2936862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015490802.345, "dur": 22.700, + "args": { + "External id": 2936863,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015490841.052, "dur": 13.986, + "args": { + "External id": 2936864,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015490872.433, "dur": 161.842, + "args": { + "External id": 2936865,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015490917.299, "dur": 10.327, + "args": { + "External id": 2936866,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015490921.661, "dur": 5.185, + "args": { + "External id": 2936867,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015490930.408, "dur": 5.660, + "args": { + "External id": 2936868,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015490937.443, "dur": 1.123, + "args": { + "External id": 2936869,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015490940.955, "dur": 4.720, + "args": { + "External id": 2936870,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015491047.004, "dur": 53.963, + "args": { + "External id": 2936871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015491131.284, "dur": 45.271, + "args": { + "External id": 2936872,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015491189.402, "dur": 46.510, + "args": { + "External id": 2936873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015491244.113, "dur": 35.582, + "args": { + "External id": 2936874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015491301.833, "dur": 25.050, + "args": { + "External id": 2936875,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015491332.247, "dur": 34.420, + "args": { + "External id": 2936876,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015491385.809, "dur": 18.713, + "args": { + "External id": 2936877,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 1336755, "tid": 1336755, + "ts": 1555015491545.935, "dur": 73.101, + "args": { + "External id": 2936878,"Record function id": 0, "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015491689.949, "dur": 43.148, + "args": { + "External id": 2936879,"Record function id": 0, "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 1336755, "tid": 1336755, + "ts": 1555015491742.135, "dur": 18433.992, + "args": { + "External id": 2936880,"Record function id": 0, "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 1336755, "tid": 1336755, + "ts": 1555015491749.833, "dur": 935.762, + "args": { + "External id": 2936881,"Record function id": 0, "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015491829.266, "dur": 7.531, + "args": { + "External id": 2936882,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015491848.870, "dur": 35.095, + "args": { + "External id": 2936883,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491853.995, "dur": 2.132, + "args": { + "External id": 2936884,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491860.027, "dur": 0.335, + "args": { + "External id": 2936885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491861.792, "dur": 0.380, + "args": { + "External id": 2936886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491863.917, "dur": 0.309, + "args": { + "External id": 2936887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491867.234, "dur": 0.273, + "args": { + "External id": 2936888,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491868.846, "dur": 0.341, + "args": { + "External id": 2936889,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491870.714, "dur": 3.312, + "args": { + "External id": 2936890,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491875.783, "dur": 0.466, + "args": { + "External id": 2936891,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015491877.622, "dur": 0.329, + "args": { + "External id": 2936892,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015491897.304, "dur": 39.220, + "args": { + "External id": 2936893,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015491968.094, "dur": 158.866, + "args": { + "External id": 2936894,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015491980.281, "dur": 45.470, + "args": { + "External id": 2936895,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015492033.173, "dur": 11.137, + "args": { + "External id": 2936896,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015492037.359, "dur": 6.554, + "args": { + "External id": 2936897,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492041.547, "dur": 0.683, + "args": { + "External id": 2936898,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015492052.376, "dur": 26.818, + "args": { + "External id": 2936899,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492054.757, "dur": 0.490, + "args": { + "External id": 2936900,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492056.620, "dur": 1.971, + "args": { + "External id": 2936901,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492059.802, "dur": 0.410, + "args": { + "External id": 2936902,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492061.350, "dur": 1.576, + "args": { + "External id": 2936903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492066.041, "dur": 0.233, + "args": { + "External id": 2936904,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492067.566, "dur": 0.288, + "args": { + "External id": 2936905,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492069.088, "dur": 0.415, + "args": { + "External id": 2936906,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492072.543, "dur": 0.408, + "args": { + "External id": 2936907,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492074.289, "dur": 0.285, + "args": { + "External id": 2936908,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015492091.727, "dur": 26.998, + "args": { + "External id": 2936909,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015492196.593, "dur": 399.135, + "args": { + "External id": 2936910,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015492227.527, "dur": 363.523, + "args": { + "External id": 2936911,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6886, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015492238.314, "dur": 347.146, + "args": { + "External id": 2936912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015492616.629, "dur": 2.290, + "args": { + "External id": 2936913,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6888, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 1336755, "tid": 1336755, + "ts": 1555015492705.976, "dur": 17224.256, + "args": { + "External id": 2936914,"Record function id": 0, "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492803.671, "dur": 6.018, + "args": { + "External id": 2936915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492813.144, "dur": 1.101, + "args": { + "External id": 2936916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492816.036, "dur": 2.081, + "args": { + "External id": 2936917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492819.668, "dur": 0.829, + "args": { + "External id": 2936918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492821.675, "dur": 0.766, + "args": { + "External id": 2936919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492823.784, "dur": 0.947, + "args": { + "External id": 2936920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492827.609, "dur": 1.044, + "args": { + "External id": 2936921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492830.229, "dur": 2.004, + "args": { + "External id": 2936922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492833.714, "dur": 0.896, + "args": { + "External id": 2936923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015492836.171, "dur": 0.929, + "args": { + "External id": 2936924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015492856.273, "dur": 17037.676, + "args": { + "External id": 2936925,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015492871.680, "dur": 17015.960, + "args": { + "External id": 2936926,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015492894.798, "dur": 12.932, + "args": { + "External id": 2936927,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015492910.782, "dur": 16945.264, + "args": { + "External id": 2936928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015492913.296, "dur": 16942.074, + "args": { + "External id": 2936929,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015492918.575, "dur": 6.295, + "args": { + "External id": 2936930,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015492926.557, "dur": 16926.014, + "args": { + "External id": 2936931,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015510101.962, "dur": 35.466, + "args": { + "External id": 2936932,"Sequence number": 29294583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6907 + } + }, + { + "ph": "s", "id": 205, "pid": 1336755, "tid": 1336755, "ts": 1555015510101.962, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015510125.195, "dur": 7.485, + "args": { + "External id": 2936933,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015510128.080, "dur": 4.229, + "args": { + "External id": 2936934,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015510214.508, "dur": 76.476, + "args": { + "External id": 2936935,"Record function id": 0, "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015510292.924, "dur": 1051.654, + "args": { + "External id": 2936936,"Record function id": 0, "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015510334.915, "dur": 996.920, + "args": { + "External id": 2936937,"Sequence number": 29294584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6912 + } + }, + { + "ph": "s", "id": 204, "pid": 1336755, "tid": 1336755, "ts": 1555015510334.915, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015510398.339, "dur": 43.096, + "args": { + "External id": 2936938,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015510454.471, "dur": 103.453, + "args": { + "External id": 2936939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015510567.668, "dur": 38.618, + "args": { + "External id": 2936940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015510613.803, "dur": 31.421, + "args": { + "External id": 2936941,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015510669.854, "dur": 24.563, + "args": { + "External id": 2936942,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015510710.457, "dur": 15.627, + "args": { + "External id": 2936943,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015510743.181, "dur": 121.788, + "args": { + "External id": 2936944,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015510787.358, "dur": 10.740, + "args": { + "External id": 2936945,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015510792.114, "dur": 4.885, + "args": { + "External id": 2936946,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015510800.894, "dur": 5.179, + "args": { + "External id": 2936947,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015510807.575, "dur": 1.251, + "args": { + "External id": 2936948,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015510811.429, "dur": 4.921, + "args": { + "External id": 2936949,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015510875.096, "dur": 45.140, + "args": { + "External id": 2936950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015510945.947, "dur": 28.618, + "args": { + "External id": 2936951,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015511024.022, "dur": 47.078, + "args": { + "External id": 2936952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015511080.817, "dur": 36.691, + "args": { + "External id": 2936953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015511139.819, "dur": 41.706, + "args": { + "External id": 2936954,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015511189.081, "dur": 39.264, + "args": { + "External id": 2936955,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015511248.438, "dur": 17.376, + "args": { + "External id": 2936956,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 1336755, "tid": 1336755, + "ts": 1555015511407.238, "dur": 74.628, + "args": { + "External id": 2936957,"Record function id": 0, "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015511552.629, "dur": 47.304, + "args": { + "External id": 2936958,"Record function id": 0, "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 1336755, "tid": 1336755, + "ts": 1555015511609.330, "dur": 18618.049, + "args": { + "External id": 2936959,"Record function id": 0, "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 1336755, "tid": 1336755, + "ts": 1555015511616.759, "dur": 873.887, + "args": { + "External id": 2936960,"Record function id": 0, "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015511693.548, "dur": 7.413, + "args": { + "External id": 2936961,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015511714.523, "dur": 34.124, + "args": { + "External id": 2936962,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511719.847, "dur": 2.213, + "args": { + "External id": 2936963,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511726.037, "dur": 0.578, + "args": { + "External id": 2936964,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511727.714, "dur": 0.522, + "args": { + "External id": 2936965,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511729.412, "dur": 2.123, + "args": { + "External id": 2936966,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511732.922, "dur": 0.462, + "args": { + "External id": 2936967,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511734.651, "dur": 0.441, + "args": { + "External id": 2936968,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511738.314, "dur": 1.407, + "args": { + "External id": 2936969,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511740.898, "dur": 0.321, + "args": { + "External id": 2936970,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511742.525, "dur": 0.195, + "args": { + "External id": 2936971,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015511759.416, "dur": 38.185, + "args": { + "External id": 2936972,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015511827.820, "dur": 106.657, + "args": { + "External id": 2936973,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015511840.313, "dur": 3.392, + "args": { + "External id": 2936974,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015511848.958, "dur": 12.453, + "args": { + "External id": 2936975,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015511853.186, "dur": 7.786, + "args": { + "External id": 2936976,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511857.118, "dur": 2.600, + "args": { + "External id": 2936977,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015511867.986, "dur": 26.124, + "args": { + "External id": 2936978,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511870.306, "dur": 0.351, + "args": { + "External id": 2936979,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511872.326, "dur": 0.411, + "args": { + "External id": 2936980,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511873.916, "dur": 0.340, + "args": { + "External id": 2936981,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511876.969, "dur": 1.468, + "args": { + "External id": 2936982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511879.770, "dur": 0.337, + "args": { + "External id": 2936983,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511881.473, "dur": 2.077, + "args": { + "External id": 2936984,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511884.610, "dur": 0.419, + "args": { + "External id": 2936985,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511886.253, "dur": 0.317, + "args": { + "External id": 2936986,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015511889.488, "dur": 0.216, + "args": { + "External id": 2936987,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015511904.361, "dur": 22.757, + "args": { + "External id": 2936988,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015512025.183, "dur": 372.695, + "args": { + "External id": 2936989,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015512055.977, "dur": 336.869, + "args": { + "External id": 2936990,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6965, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015512067.140, "dur": 319.730, + "args": { + "External id": 2936991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015512421.735, "dur": 2.274, + "args": { + "External id": 2936992,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6967, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 1336755, "tid": 1336755, + "ts": 1555015512510.427, "dur": 17513.114, + "args": { + "External id": 2936993,"Record function id": 0, "Ev Idx": 6968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512605.274, "dur": 6.116, + "args": { + "External id": 2936994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512614.704, "dur": 1.356, + "args": { + "External id": 2936995,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512617.785, "dur": 2.103, + "args": { + "External id": 2936996,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512621.384, "dur": 1.090, + "args": { + "External id": 2936997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512623.653, "dur": 0.883, + "args": { + "External id": 2936998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512625.800, "dur": 0.838, + "args": { + "External id": 2936999,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512629.902, "dur": 0.720, + "args": { + "External id": 2937000,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512632.261, "dur": 1.572, + "args": { + "External id": 2937001,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512635.578, "dur": 0.717, + "args": { + "External id": 2937002,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015512637.858, "dur": 0.870, + "args": { + "External id": 2937003,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015512658.099, "dur": 17305.580, + "args": { + "External id": 2937004,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015512673.099, "dur": 17283.871, + "args": { + "External id": 2937005,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015512695.587, "dur": 12.839, + "args": { + "External id": 2937006,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015512711.258, "dur": 17212.813, + "args": { + "External id": 2937007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015512713.954, "dur": 17209.631, + "args": { + "External id": 2937008,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015512719.315, "dur": 5.663, + "args": { + "External id": 2937009,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015512726.754, "dur": 17194.096, + "args": { + "External id": 2937010,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015530166.969, "dur": 35.307, + "args": { + "External id": 2937011,"Sequence number": 29294585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6986 + } + }, + { + "ph": "s", "id": 203, "pid": 1336755, "tid": 1336755, "ts": 1555015530166.969, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015530190.010, "dur": 7.737, + "args": { + "External id": 2937012,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015530193.201, "dur": 4.242, + "args": { + "External id": 2937013,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015530263.963, "dur": 82.200, + "args": { + "External id": 2937014,"Record function id": 0, "Ev Idx": 6989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015530347.947, "dur": 1064.166, + "args": { + "External id": 2937015,"Record function id": 0, "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015530387.447, "dur": 1012.510, + "args": { + "External id": 2937016,"Sequence number": 29294586, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6991 + } + }, + { + "ph": "s", "id": 202, "pid": 1336755, "tid": 1336755, "ts": 1555015530387.447, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015530449.345, "dur": 43.293, + "args": { + "External id": 2937017,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015530505.149, "dur": 104.912, + "args": { + "External id": 2937018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015530619.735, "dur": 40.615, + "args": { + "External id": 2937019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015530668.148, "dur": 32.368, + "args": { + "External id": 2937020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015530724.124, "dur": 25.299, + "args": { + "External id": 2937021,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015530765.273, "dur": 15.948, + "args": { + "External id": 2937022,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015530798.757, "dur": 120.753, + "args": { + "External id": 2937023,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015530846.045, "dur": 10.253, + "args": { + "External id": 2937024,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015530850.901, "dur": 4.552, + "args": { + "External id": 2937025,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015530859.058, "dur": 5.189, + "args": { + "External id": 2937026,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015530865.802, "dur": 1.092, + "args": { + "External id": 2937027,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015530869.367, "dur": 3.893, + "args": { + "External id": 2937028,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015530929.901, "dur": 45.795, + "args": { + "External id": 2937029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015531048.666, "dur": 30.376, + "args": { + "External id": 2937030,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015531089.354, "dur": 46.249, + "args": { + "External id": 2937031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015531157.709, "dur": 40.832, + "args": { + "External id": 2937032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015531226.767, "dur": 30.065, + "args": { + "External id": 2937033,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015531264.044, "dur": 37.024, + "args": { + "External id": 2937034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015531319.298, "dur": 18.585, + "args": { + "External id": 2937035,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 1336755, "tid": 1336755, + "ts": 1555015531474.428, "dur": 72.413, + "args": { + "External id": 2937036,"Record function id": 0, "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015531617.155, "dur": 46.219, + "args": { + "External id": 2937037,"Record function id": 0, "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 1336755, "tid": 1336755, + "ts": 1555015531672.629, "dur": 18641.136, + "args": { + "External id": 2937038,"Record function id": 0, "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 1336755, "tid": 1336755, + "ts": 1555015531680.373, "dur": 885.363, + "args": { + "External id": 2937039,"Record function id": 0, "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015531757.812, "dur": 7.391, + "args": { + "External id": 2937040,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015531777.597, "dur": 33.820, + "args": { + "External id": 2937041,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531782.519, "dur": 2.360, + "args": { + "External id": 2937042,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531789.034, "dur": 0.412, + "args": { + "External id": 2937043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531791.125, "dur": 0.741, + "args": { + "External id": 2937044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531793.074, "dur": 0.492, + "args": { + "External id": 2937045,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531796.383, "dur": 0.482, + "args": { + "External id": 2937046,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531798.046, "dur": 0.273, + "args": { + "External id": 2937047,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531799.839, "dur": 2.021, + "args": { + "External id": 2937048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531803.737, "dur": 0.352, + "args": { + "External id": 2937049,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531805.325, "dur": 0.294, + "args": { + "External id": 2937050,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015531822.527, "dur": 37.429, + "args": { + "External id": 2937051,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015531891.987, "dur": 155.826, + "args": { + "External id": 2937052,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015531904.306, "dur": 3.881, + "args": { + "External id": 2937053,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015531913.075, "dur": 10.301, + "args": { + "External id": 2937054,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015531917.407, "dur": 5.572, + "args": { + "External id": 2937055,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531921.024, "dur": 0.629, + "args": { + "External id": 2937056,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015531930.116, "dur": 28.340, + "args": { + "External id": 2937057,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531932.354, "dur": 2.450, + "args": { + "External id": 2937058,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531936.391, "dur": 0.463, + "args": { + "External id": 2937059,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531938.237, "dur": 0.398, + "args": { + "External id": 2937060,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531941.772, "dur": 1.358, + "args": { + "External id": 2937061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531944.431, "dur": 0.287, + "args": { + "External id": 2937062,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531946.031, "dur": 0.264, + "args": { + "External id": 2937063,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531949.238, "dur": 0.433, + "args": { + "External id": 2937064,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531951.043, "dur": 0.270, + "args": { + "External id": 2937065,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015531952.716, "dur": 1.735, + "args": { + "External id": 2937066,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015531970.169, "dur": 67.626, + "args": { + "External id": 2937067,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015532102.082, "dur": 372.733, + "args": { + "External id": 2937068,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015532132.850, "dur": 337.035, + "args": { + "External id": 2937069,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7044, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015532157.649, "dur": 307.012, + "args": { + "External id": 2937070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015532498.222, "dur": 2.340, + "args": { + "External id": 2937071,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7046, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 1336755, "tid": 1336755, + "ts": 1555015532586.009, "dur": 17518.491, + "args": { + "External id": 2937072,"Record function id": 0, "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532680.040, "dur": 6.005, + "args": { + "External id": 2937073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532689.164, "dur": 1.548, + "args": { + "External id": 2937074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532692.529, "dur": 2.744, + "args": { + "External id": 2937075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532696.983, "dur": 1.094, + "args": { + "External id": 2937076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532699.550, "dur": 0.828, + "args": { + "External id": 2937077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532701.759, "dur": 1.023, + "args": { + "External id": 2937078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532705.573, "dur": 0.952, + "args": { + "External id": 2937079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532708.308, "dur": 1.805, + "args": { + "External id": 2937080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532711.481, "dur": 1.196, + "args": { + "External id": 2937081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015532714.275, "dur": 1.012, + "args": { + "External id": 2937082,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015532734.838, "dur": 17332.463, + "args": { + "External id": 2937083,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015532750.189, "dur": 17310.724, + "args": { + "External id": 2937084,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015532772.928, "dur": 12.900, + "args": { + "External id": 2937085,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015532788.680, "dur": 17239.575, + "args": { + "External id": 2937086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015532791.412, "dur": 17236.303, + "args": { + "External id": 2937087,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015532796.784, "dur": 4.578, + "args": { + "External id": 2937088,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015532803.114, "dur": 17221.961, + "args": { + "External id": 2937089,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015550252.987, "dur": 36.210, + "args": { + "External id": 2937090,"Sequence number": 29294587, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7065 + } + }, + { + "ph": "s", "id": 201, "pid": 1336755, "tid": 1336755, "ts": 1555015550252.987, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015550277.163, "dur": 7.527, + "args": { + "External id": 2937091,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015550280.090, "dur": 4.249, + "args": { + "External id": 2937092,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015550350.143, "dur": 81.489, + "args": { + "External id": 2937093,"Record function id": 0, "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015550433.160, "dur": 1069.125, + "args": { + "External id": 2937094,"Record function id": 0, "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015550472.849, "dur": 1016.183, + "args": { + "External id": 2937095,"Sequence number": 29294588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7070 + } + }, + { + "ph": "s", "id": 200, "pid": 1336755, "tid": 1336755, "ts": 1555015550472.849, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015550536.189, "dur": 39.596, + "args": { + "External id": 2937096,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015550587.039, "dur": 103.748, + "args": { + "External id": 2937097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015550700.844, "dur": 38.148, + "args": { + "External id": 2937098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015550746.551, "dur": 32.368, + "args": { + "External id": 2937099,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015550801.950, "dur": 24.440, + "args": { + "External id": 2937100,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015550845.944, "dur": 16.032, + "args": { + "External id": 2937101,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015550879.470, "dur": 168.711, + "args": { + "External id": 2937102,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015550926.356, "dur": 10.727, + "args": { + "External id": 2937103,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015550931.368, "dur": 4.899, + "args": { + "External id": 2937104,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015550939.915, "dur": 4.852, + "args": { + "External id": 2937105,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015550946.223, "dur": 1.117, + "args": { + "External id": 2937106,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015550949.816, "dur": 3.456, + "args": { + "External id": 2937107,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015551060.840, "dur": 57.230, + "args": { + "External id": 2937108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015551163.403, "dur": 31.983, + "args": { + "External id": 2937109,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015551206.923, "dur": 45.619, + "args": { + "External id": 2937110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015551261.178, "dur": 36.122, + "args": { + "External id": 2937111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015551320.449, "dur": 25.211, + "args": { + "External id": 2937112,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015551350.903, "dur": 34.925, + "args": { + "External id": 2937113,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015551403.862, "dur": 17.612, + "args": { + "External id": 2937114,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 1336755, "tid": 1336755, + "ts": 1555015551567.424, "dur": 74.428, + "args": { + "External id": 2937115,"Record function id": 0, "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015551715.033, "dur": 45.941, + "args": { + "External id": 2937116,"Record function id": 0, "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 1336755, "tid": 1336755, + "ts": 1555015551770.421, "dur": 18443.715, + "args": { + "External id": 2937117,"Record function id": 0, "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 1336755, "tid": 1336755, + "ts": 1555015551778.155, "dur": 887.977, + "args": { + "External id": 2937118,"Record function id": 0, "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015551856.840, "dur": 7.568, + "args": { + "External id": 2937119,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015551876.262, "dur": 41.229, + "args": { + "External id": 2937120,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551884.980, "dur": 2.009, + "args": { + "External id": 2937121,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551892.114, "dur": 0.248, + "args": { + "External id": 2937122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551893.544, "dur": 0.362, + "args": { + "External id": 2937123,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551895.155, "dur": 2.056, + "args": { + "External id": 2937124,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551898.702, "dur": 0.415, + "args": { + "External id": 2937125,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551902.262, "dur": 0.369, + "args": { + "External id": 2937126,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551905.516, "dur": 1.103, + "args": { + "External id": 2937127,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551907.880, "dur": 0.284, + "args": { + "External id": 2937128,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015551909.492, "dur": 1.862, + "args": { + "External id": 2937129,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015551927.942, "dur": 37.585, + "args": { + "External id": 2937130,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015552038.723, "dur": 131.724, + "args": { + "External id": 2937131,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015552051.117, "dur": 5.506, + "args": { + "External id": 2937132,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015552062.185, "dur": 10.868, + "args": { + "External id": 2937133,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015552066.216, "dur": 6.438, + "args": { + "External id": 2937134,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552069.974, "dur": 1.010, + "args": { + "External id": 2937135,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015552080.482, "dur": 26.882, + "args": { + "External id": 2937136,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552082.677, "dur": 0.388, + "args": { + "External id": 2937137,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552084.792, "dur": 0.644, + "args": { + "External id": 2937138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552086.692, "dur": 2.469, + "args": { + "External id": 2937139,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552090.564, "dur": 1.265, + "args": { + "External id": 2937140,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552093.083, "dur": 0.247, + "args": { + "External id": 2937141,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552096.441, "dur": 0.329, + "args": { + "External id": 2937142,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552098.012, "dur": 0.386, + "args": { + "External id": 2937143,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552099.616, "dur": 0.369, + "args": { + "External id": 2937144,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552102.605, "dur": 0.327, + "args": { + "External id": 2937145,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015552118.164, "dur": 42.384, + "args": { + "External id": 2937146,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015552225.645, "dur": 354.895, + "args": { + "External id": 2937147,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015552256.895, "dur": 319.321, + "args": { + "External id": 2937148,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7123, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015552267.370, "dur": 303.325, + "args": { + "External id": 2937149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015552601.055, "dur": 2.481, + "args": { + "External id": 2937150,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7125, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 1336755, "tid": 1336755, + "ts": 1555015552685.245, "dur": 17290.420, + "args": { + "External id": 2937151,"Record function id": 0, "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552776.990, "dur": 5.962, + "args": { + "External id": 2937152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552786.191, "dur": 1.156, + "args": { + "External id": 2937153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552788.930, "dur": 2.296, + "args": { + "External id": 2937154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552792.982, "dur": 1.176, + "args": { + "External id": 2937155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552795.411, "dur": 0.846, + "args": { + "External id": 2937156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552797.548, "dur": 0.741, + "args": { + "External id": 2937157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552801.342, "dur": 0.887, + "args": { + "External id": 2937158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552803.924, "dur": 1.868, + "args": { + "External id": 2937159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552807.686, "dur": 0.658, + "args": { + "External id": 2937160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015552809.876, "dur": 0.822, + "args": { + "External id": 2937161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015552830.185, "dur": 17107.155, + "args": { + "External id": 2937162,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015552845.284, "dur": 17085.343, + "args": { + "External id": 2937163,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015552866.861, "dur": 13.334, + "args": { + "External id": 2937164,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015552883.254, "dur": 17015.680, + "args": { + "External id": 2937165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015552886.079, "dur": 17012.374, + "args": { + "External id": 2937166,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015552891.661, "dur": 5.705, + "args": { + "External id": 2937167,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015552899.193, "dur": 16996.378, + "args": { + "External id": 2937168,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015570139.959, "dur": 47.030, + "args": { + "External id": 2937169,"Sequence number": 29294589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7144 + } + }, + { + "ph": "s", "id": 199, "pid": 1336755, "tid": 1336755, "ts": 1555015570139.959, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015570173.892, "dur": 8.133, + "args": { + "External id": 2937170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015570177.114, "dur": 4.583, + "args": { + "External id": 2937171,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015570251.603, "dur": 76.342, + "args": { + "External id": 2937172,"Record function id": 0, "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015570329.581, "dur": 1065.448, + "args": { + "External id": 2937173,"Record function id": 0, "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015570371.122, "dur": 1011.256, + "args": { + "External id": 2937174,"Sequence number": 29294590, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7149 + } + }, + { + "ph": "s", "id": 198, "pid": 1336755, "tid": 1336755, "ts": 1555015570371.122, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015570433.550, "dur": 42.247, + "args": { + "External id": 2937175,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015570487.015, "dur": 105.225, + "args": { + "External id": 2937176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015570601.827, "dur": 39.753, + "args": { + "External id": 2937177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015570650.144, "dur": 30.716, + "args": { + "External id": 2937178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015570703.847, "dur": 24.737, + "args": { + "External id": 2937179,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015570745.016, "dur": 14.358, + "args": { + "External id": 2937180,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015570776.215, "dur": 129.214, + "args": { + "External id": 2937181,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015570830.385, "dur": 10.999, + "args": { + "External id": 2937182,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015570835.311, "dur": 5.381, + "args": { + "External id": 2937183,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015570843.977, "dur": 5.391, + "args": { + "External id": 2937184,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015570850.885, "dur": 1.314, + "args": { + "External id": 2937185,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015570854.958, "dur": 3.774, + "args": { + "External id": 2937186,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015570915.164, "dur": 44.636, + "args": { + "External id": 2937187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015571029.401, "dur": 31.710, + "args": { + "External id": 2937188,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015571073.448, "dur": 46.424, + "args": { + "External id": 2937189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015571127.932, "dur": 51.000, + "args": { + "External id": 2937190,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015571203.923, "dur": 26.527, + "args": { + "External id": 2937191,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015571236.849, "dur": 41.723, + "args": { + "External id": 2937192,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015571299.175, "dur": 18.012, + "args": { + "External id": 2937193,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7168 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 1336755, "tid": 1336755, + "ts": 1555015571456.053, "dur": 70.977, + "args": { + "External id": 2937194,"Record function id": 0, "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015571598.213, "dur": 45.556, + "args": { + "External id": 2937195,"Record function id": 0, "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 1336755, "tid": 1336755, + "ts": 1555015571653.815, "dur": 18627.472, + "args": { + "External id": 2937196,"Record function id": 0, "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 1336755, "tid": 1336755, + "ts": 1555015571662.094, "dur": 886.249, + "args": { + "External id": 2937197,"Record function id": 0, "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015571738.143, "dur": 7.724, + "args": { + "External id": 2937198,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015571757.416, "dur": 33.819, + "args": { + "External id": 2937199,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571762.462, "dur": 2.148, + "args": { + "External id": 2937200,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571768.680, "dur": 0.521, + "args": { + "External id": 2937201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571770.669, "dur": 0.360, + "args": { + "External id": 2937202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571772.590, "dur": 0.538, + "args": { + "External id": 2937203,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571776.203, "dur": 0.549, + "args": { + "External id": 2937204,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571778.189, "dur": 0.142, + "args": { + "External id": 2937205,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571779.694, "dur": 2.221, + "args": { + "External id": 2937206,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571783.802, "dur": 0.277, + "args": { + "External id": 2937207,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571785.498, "dur": 0.190, + "args": { + "External id": 2937208,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015571801.620, "dur": 36.402, + "args": { + "External id": 2937209,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015571869.048, "dur": 156.002, + "args": { + "External id": 2937210,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015571879.711, "dur": 4.118, + "args": { + "External id": 2937211,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015571889.110, "dur": 10.292, + "args": { + "External id": 2937212,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015571893.303, "dur": 5.703, + "args": { + "External id": 2937213,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571897.122, "dur": 0.795, + "args": { + "External id": 2937214,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015571906.725, "dur": 27.784, + "args": { + "External id": 2937215,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571909.040, "dur": 2.097, + "args": { + "External id": 2937216,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571912.906, "dur": 0.462, + "args": { + "External id": 2937217,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571914.942, "dur": 0.415, + "args": { + "External id": 2937218,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571918.127, "dur": 1.296, + "args": { + "External id": 2937219,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571920.648, "dur": 0.619, + "args": { + "External id": 2937220,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571922.590, "dur": 0.366, + "args": { + "External id": 2937221,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571925.684, "dur": 0.179, + "args": { + "External id": 2937222,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571927.049, "dur": 0.164, + "args": { + "External id": 2937223,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015571928.446, "dur": 1.922, + "args": { + "External id": 2937224,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015571954.123, "dur": 22.799, + "args": { + "External id": 2937225,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015572081.442, "dur": 375.333, + "args": { + "External id": 2937226,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015572112.723, "dur": 338.888, + "args": { + "External id": 2937227,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7202, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015572124.135, "dur": 321.940, + "args": { + "External id": 2937228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015572479.802, "dur": 2.313, + "args": { + "External id": 2937229,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7204, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 1336755, "tid": 1336755, + "ts": 1555015572569.121, "dur": 17504.872, + "args": { + "External id": 2937230,"Record function id": 0, "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572666.820, "dur": 6.189, + "args": { + "External id": 2937231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572676.179, "dur": 1.218, + "args": { + "External id": 2937232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572679.264, "dur": 1.915, + "args": { + "External id": 2937233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572682.872, "dur": 1.075, + "args": { + "External id": 2937234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572685.255, "dur": 1.182, + "args": { + "External id": 2937235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572687.696, "dur": 0.802, + "args": { + "External id": 2937236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572691.817, "dur": 0.967, + "args": { + "External id": 2937237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572694.246, "dur": 1.706, + "args": { + "External id": 2937238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572697.245, "dur": 0.764, + "args": { + "External id": 2937239,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015572699.507, "dur": 0.747, + "args": { + "External id": 2937240,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015572719.600, "dur": 17317.115, + "args": { + "External id": 2937241,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015572735.252, "dur": 17294.403, + "args": { + "External id": 2937242,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015572757.131, "dur": 13.493, + "args": { + "External id": 2937243,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015572773.494, "dur": 17202.130, + "args": { + "External id": 2937244,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015572776.101, "dur": 17198.975, + "args": { + "External id": 2937245,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015572781.403, "dur": 5.205, + "args": { + "External id": 2937246,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015572788.300, "dur": 17184.060, + "args": { + "External id": 2937247,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015590220.222, "dur": 35.238, + "args": { + "External id": 2937248,"Sequence number": 29294591, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7223 + } + }, + { + "ph": "s", "id": 197, "pid": 1336755, "tid": 1336755, "ts": 1555015590220.222, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015590243.346, "dur": 7.727, + "args": { + "External id": 2937249,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015590246.710, "dur": 3.970, + "args": { + "External id": 2937250,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015590319.299, "dur": 79.561, + "args": { + "External id": 2937251,"Record function id": 0, "Ev Idx": 7226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015590400.329, "dur": 1056.576, + "args": { + "External id": 2937252,"Record function id": 0, "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015590445.028, "dur": 998.706, + "args": { + "External id": 2937253,"Sequence number": 29294592, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7228 + } + }, + { + "ph": "s", "id": 196, "pid": 1336755, "tid": 1336755, "ts": 1555015590445.028, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015590506.132, "dur": 40.707, + "args": { + "External id": 2937254,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015590559.033, "dur": 106.862, + "args": { + "External id": 2937255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015590675.743, "dur": 39.546, + "args": { + "External id": 2937256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015590722.902, "dur": 33.383, + "args": { + "External id": 2937257,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015590778.939, "dur": 23.808, + "args": { + "External id": 2937258,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015590819.307, "dur": 13.714, + "args": { + "External id": 2937259,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015590849.376, "dur": 119.296, + "args": { + "External id": 2937260,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015590894.459, "dur": 11.033, + "args": { + "External id": 2937261,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015590899.227, "dur": 5.379, + "args": { + "External id": 2937262,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015590908.152, "dur": 5.512, + "args": { + "External id": 2937263,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015590914.958, "dur": 1.350, + "args": { + "External id": 2937264,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015590918.608, "dur": 3.693, + "args": { + "External id": 2937265,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015590978.656, "dur": 88.391, + "args": { + "External id": 2937266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015591100.281, "dur": 29.004, + "args": { + "External id": 2937267,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015591139.244, "dur": 60.884, + "args": { + "External id": 2937268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015591211.457, "dur": 36.556, + "args": { + "External id": 2937269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015591271.221, "dur": 26.640, + "args": { + "External id": 2937270,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015591303.692, "dur": 34.644, + "args": { + "External id": 2937271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015591357.507, "dur": 17.446, + "args": { + "External id": 2937272,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7247 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 1336755, "tid": 1336755, + "ts": 1555015591520.674, "dur": 73.250, + "args": { + "External id": 2937273,"Record function id": 0, "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015591665.330, "dur": 44.827, + "args": { + "External id": 2937274,"Record function id": 0, "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 1336755, "tid": 1336755, + "ts": 1555015591719.669, "dur": 18563.106, + "args": { + "External id": 2937275,"Record function id": 0, "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 1336755, "tid": 1336755, + "ts": 1555015591728.721, "dur": 973.801, + "args": { + "External id": 2937276,"Record function id": 0, "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015591808.374, "dur": 7.491, + "args": { + "External id": 2937277,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015591827.972, "dur": 33.256, + "args": { + "External id": 2937278,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591832.838, "dur": 2.110, + "args": { + "External id": 2937279,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591839.256, "dur": 0.235, + "args": { + "External id": 2937280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591840.903, "dur": 0.381, + "args": { + "External id": 2937281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591842.519, "dur": 0.427, + "args": { + "External id": 2937282,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591845.643, "dur": 0.405, + "args": { + "External id": 2937283,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591847.208, "dur": 0.610, + "args": { + "External id": 2937284,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591849.399, "dur": 3.158, + "args": { + "External id": 2937285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591853.794, "dur": 0.535, + "args": { + "External id": 2937286,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591855.479, "dur": 0.147, + "args": { + "External id": 2937287,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015591874.111, "dur": 36.938, + "args": { + "External id": 2937288,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015591942.429, "dur": 155.244, + "args": { + "External id": 2937289,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015591953.051, "dur": 5.349, + "args": { + "External id": 2937290,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015591963.749, "dur": 10.581, + "args": { + "External id": 2937291,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015591967.949, "dur": 5.745, + "args": { + "External id": 2937292,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015591971.873, "dur": 0.509, + "args": { + "External id": 2937293,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015591980.884, "dur": 68.962, + "args": { + "External id": 2937294,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592023.839, "dur": 0.588, + "args": { + "External id": 2937295,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592026.485, "dur": 2.134, + "args": { + "External id": 2937296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592030.231, "dur": 0.330, + "args": { + "External id": 2937297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592032.347, "dur": 1.472, + "args": { + "External id": 2937298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592037.158, "dur": 0.250, + "args": { + "External id": 2937299,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592038.558, "dur": 0.297, + "args": { + "External id": 2937300,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592040.299, "dur": 0.281, + "args": { + "External id": 2937301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592043.384, "dur": 0.282, + "args": { + "External id": 2937302,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592044.887, "dur": 0.344, + "args": { + "External id": 2937303,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015592062.475, "dur": 26.460, + "args": { + "External id": 2937304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015592163.134, "dur": 443.429, + "args": { + "External id": 2937305,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015592193.589, "dur": 407.242, + "args": { + "External id": 2937306,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7281, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015592204.735, "dur": 388.579, + "args": { + "External id": 2937307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015592631.435, "dur": 2.538, + "args": { + "External id": 2937308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7283, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 1336755, "tid": 1336755, + "ts": 1555015592723.137, "dur": 17355.024, + "args": { + "External id": 2937309,"Record function id": 0, "Ev Idx": 7284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592820.318, "dur": 6.667, + "args": { + "External id": 2937310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592830.314, "dur": 1.088, + "args": { + "External id": 2937311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592832.991, "dur": 2.180, + "args": { + "External id": 2937312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592836.783, "dur": 0.935, + "args": { + "External id": 2937313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592839.269, "dur": 0.908, + "args": { + "External id": 2937314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592841.467, "dur": 0.921, + "args": { + "External id": 2937315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592845.569, "dur": 0.840, + "args": { + "External id": 2937316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592848.063, "dur": 1.537, + "args": { + "External id": 2937317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592851.014, "dur": 0.875, + "args": { + "External id": 2937318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015592853.450, "dur": 0.753, + "args": { + "External id": 2937319,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015592873.853, "dur": 17166.934, + "args": { + "External id": 2937320,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015592889.521, "dur": 17144.565, + "args": { + "External id": 2937321,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015592913.549, "dur": 12.617, + "args": { + "External id": 2937322,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015592929.299, "dur": 17050.629, + "args": { + "External id": 2937323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015592931.679, "dur": 17047.710, + "args": { + "External id": 2937324,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015592937.297, "dur": 4.925, + "args": { + "External id": 2937325,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015592943.973, "dur": 17032.841, + "args": { + "External id": 2937326,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015610222.115, "dur": 35.524, + "args": { + "External id": 2937327,"Sequence number": 29294593, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7302 + } + }, + { + "ph": "s", "id": 195, "pid": 1336755, "tid": 1336755, "ts": 1555015610222.115, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015610245.311, "dur": 7.538, + "args": { + "External id": 2937328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015610248.256, "dur": 4.248, + "args": { + "External id": 2937329,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015610320.656, "dur": 78.170, + "args": { + "External id": 2937330,"Record function id": 0, "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015610400.511, "dur": 1114.029, + "args": { + "External id": 2937331,"Record function id": 0, "Ev Idx": 7306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015610440.448, "dur": 1060.984, + "args": { + "External id": 2937332,"Sequence number": 29294594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7307 + } + }, + { + "ph": "s", "id": 194, "pid": 1336755, "tid": 1336755, "ts": 1555015610440.448, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015610502.952, "dur": 40.446, + "args": { + "External id": 2937333,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015610556.346, "dur": 106.791, + "args": { + "External id": 2937334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015610672.604, "dur": 40.610, + "args": { + "External id": 2937335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015610721.958, "dur": 32.011, + "args": { + "External id": 2937336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015610783.459, "dur": 25.906, + "args": { + "External id": 2937337,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015610825.503, "dur": 15.227, + "args": { + "External id": 2937338,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015610857.465, "dur": 163.259, + "args": { + "External id": 2937339,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015610904.471, "dur": 11.153, + "args": { + "External id": 2937340,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015610909.589, "dur": 5.253, + "args": { + "External id": 2937341,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015610918.594, "dur": 5.146, + "args": { + "External id": 2937342,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015610925.317, "dur": 1.211, + "args": { + "External id": 2937343,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015610928.838, "dur": 5.199, + "args": { + "External id": 2937344,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015611063.606, "dur": 58.296, + "args": { + "External id": 2937345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015611174.070, "dur": 33.187, + "args": { + "External id": 2937346,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015611218.631, "dur": 45.731, + "args": { + "External id": 2937347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015611273.265, "dur": 35.261, + "args": { + "External id": 2937348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015611330.547, "dur": 24.683, + "args": { + "External id": 2937349,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015611360.260, "dur": 35.688, + "args": { + "External id": 2937350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015611415.141, "dur": 20.212, + "args": { + "External id": 2937351,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 1336755, "tid": 1336755, + "ts": 1555015611578.192, "dur": 79.727, + "args": { + "External id": 2937352,"Record function id": 0, "Ev Idx": 7327 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015611728.587, "dur": 45.616, + "args": { + "External id": 2937353,"Record function id": 0, "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 1336755, "tid": 1336755, + "ts": 1555015611783.627, "dur": 18543.320, + "args": { + "External id": 2937354,"Record function id": 0, "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 1336755, "tid": 1336755, + "ts": 1555015611792.500, "dur": 883.589, + "args": { + "External id": 2937355,"Record function id": 0, "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015611872.561, "dur": 7.782, + "args": { + "External id": 2937356,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015611892.836, "dur": 34.288, + "args": { + "External id": 2937357,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611898.008, "dur": 2.121, + "args": { + "External id": 2937358,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611904.179, "dur": 0.304, + "args": { + "External id": 2937359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611905.765, "dur": 0.187, + "args": { + "External id": 2937360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611907.513, "dur": 0.537, + "args": { + "External id": 2937361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611910.818, "dur": 0.522, + "args": { + "External id": 2937362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611912.617, "dur": 0.497, + "args": { + "External id": 2937363,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611914.843, "dur": 2.953, + "args": { + "External id": 2937364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611919.256, "dur": 0.486, + "args": { + "External id": 2937365,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015611920.852, "dur": 0.360, + "args": { + "External id": 2937366,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015611937.739, "dur": 35.893, + "args": { + "External id": 2937367,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015612045.712, "dur": 130.726, + "args": { + "External id": 2937368,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015612057.915, "dur": 5.563, + "args": { + "External id": 2937369,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015612069.059, "dur": 11.054, + "args": { + "External id": 2937370,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015612073.388, "dur": 6.308, + "args": { + "External id": 2937371,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612077.032, "dur": 0.909, + "args": { + "External id": 2937372,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015612087.023, "dur": 28.116, + "args": { + "External id": 2937373,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612089.122, "dur": 2.261, + "args": { + "External id": 2937374,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612092.699, "dur": 0.507, + "args": { + "External id": 2937375,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612094.615, "dur": 0.498, + "args": { + "External id": 2937376,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612098.322, "dur": 1.387, + "args": { + "External id": 2937377,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612101.055, "dur": 0.514, + "args": { + "External id": 2937378,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612103.031, "dur": 0.172, + "args": { + "External id": 2937379,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612106.020, "dur": 0.173, + "args": { + "External id": 2937380,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612107.396, "dur": 0.195, + "args": { + "External id": 2937381,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612108.908, "dur": 2.267, + "args": { + "External id": 2937382,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015612127.004, "dur": 39.616, + "args": { + "External id": 2937383,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015612232.322, "dur": 356.933, + "args": { + "External id": 2937384,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015612262.236, "dur": 322.776, + "args": { + "External id": 2937385,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7360, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015612272.587, "dur": 306.557, + "args": { + "External id": 2937386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015612610.588, "dur": 2.323, + "args": { + "External id": 2937387,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7362, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 1336755, "tid": 1336755, + "ts": 1555015612695.303, "dur": 17420.985, + "args": { + "External id": 2937388,"Record function id": 0, "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612789.519, "dur": 5.477, + "args": { + "External id": 2937389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612798.160, "dur": 1.176, + "args": { + "External id": 2937390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612800.890, "dur": 2.416, + "args": { + "External id": 2937391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612804.942, "dur": 0.797, + "args": { + "External id": 2937392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612807.279, "dur": 0.976, + "args": { + "External id": 2937393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612809.484, "dur": 1.181, + "args": { + "External id": 2937394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612814.055, "dur": 0.821, + "args": { + "External id": 2937395,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612816.256, "dur": 2.188, + "args": { + "External id": 2937396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612819.881, "dur": 0.926, + "args": { + "External id": 2937397,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015612822.430, "dur": 0.849, + "args": { + "External id": 2937398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015612842.058, "dur": 17236.122, + "args": { + "External id": 2937399,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015612858.254, "dur": 17213.375, + "args": { + "External id": 2937400,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015612882.110, "dur": 13.608, + "args": { + "External id": 2937401,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015612898.585, "dur": 17141.766, + "args": { + "External id": 2937402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015612901.225, "dur": 17138.502, + "args": { + "External id": 2937403,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015612906.467, "dur": 5.210, + "args": { + "External id": 2937404,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015612913.407, "dur": 17123.420, + "args": { + "External id": 2937405,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015630266.425, "dur": 35.623, + "args": { + "External id": 2937406,"Sequence number": 29294595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7381 + } + }, + { + "ph": "s", "id": 193, "pid": 1336755, "tid": 1336755, "ts": 1555015630266.425, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015630289.343, "dur": 8.100, + "args": { + "External id": 2937407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015630292.523, "dur": 4.525, + "args": { + "External id": 2937408,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015630363.792, "dur": 76.354, + "args": { + "External id": 2937409,"Record function id": 0, "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015630441.826, "dur": 1055.250, + "args": { + "External id": 2937410,"Record function id": 0, "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015630481.773, "dur": 1001.779, + "args": { + "External id": 2937411,"Sequence number": 29294596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7386 + } + }, + { + "ph": "s", "id": 192, "pid": 1336755, "tid": 1336755, "ts": 1555015630481.773, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015630544.880, "dur": 41.118, + "args": { + "External id": 2937412,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015630597.760, "dur": 103.838, + "args": { + "External id": 2937413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015630710.550, "dur": 40.213, + "args": { + "External id": 2937414,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015630758.568, "dur": 31.564, + "args": { + "External id": 2937415,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015630814.194, "dur": 23.635, + "args": { + "External id": 2937416,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015630854.361, "dur": 14.931, + "args": { + "External id": 2937417,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015630885.778, "dur": 161.712, + "args": { + "External id": 2937418,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015630931.456, "dur": 10.555, + "args": { + "External id": 2937419,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015630936.225, "dur": 5.085, + "args": { + "External id": 2937420,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015630944.750, "dur": 5.401, + "args": { + "External id": 2937421,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015630951.468, "dur": 1.463, + "args": { + "External id": 2937422,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015630955.391, "dur": 3.370, + "args": { + "External id": 2937423,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015631059.437, "dur": 53.757, + "args": { + "External id": 2937424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015631156.450, "dur": 29.986, + "args": { + "External id": 2937425,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015631197.483, "dur": 45.557, + "args": { + "External id": 2937426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015631251.288, "dur": 36.017, + "args": { + "External id": 2937427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015631311.492, "dur": 27.097, + "args": { + "External id": 2937428,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015631343.870, "dur": 35.505, + "args": { + "External id": 2937429,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015631398.207, "dur": 17.371, + "args": { + "External id": 2937430,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 1336755, "tid": 1336755, + "ts": 1555015631560.193, "dur": 70.761, + "args": { + "External id": 2937431,"Record function id": 0, "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015631701.642, "dur": 45.926, + "args": { + "External id": 2937432,"Record function id": 0, "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 1336755, "tid": 1336755, + "ts": 1555015631756.938, "dur": 18747.426, + "args": { + "External id": 2937433,"Record function id": 0, "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 1336755, "tid": 1336755, + "ts": 1555015631764.753, "dur": 908.560, + "args": { + "External id": 2937434,"Record function id": 0, "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015631842.427, "dur": 7.469, + "args": { + "External id": 2937435,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015631861.577, "dur": 35.109, + "args": { + "External id": 2937436,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631866.791, "dur": 2.034, + "args": { + "External id": 2937437,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631873.015, "dur": 0.336, + "args": { + "External id": 2937438,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631874.805, "dur": 0.445, + "args": { + "External id": 2937439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631876.743, "dur": 0.553, + "args": { + "External id": 2937440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631880.382, "dur": 0.718, + "args": { + "External id": 2937441,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631882.322, "dur": 0.508, + "args": { + "External id": 2937442,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631884.101, "dur": 2.843, + "args": { + "External id": 2937443,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631888.626, "dur": 0.237, + "args": { + "External id": 2937444,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015631890.415, "dur": 0.183, + "args": { + "External id": 2937445,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015631907.158, "dur": 37.669, + "args": { + "External id": 2937446,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015631976.112, "dur": 198.139, + "args": { + "External id": 2937447,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015632044.533, "dur": 7.353, + "args": { + "External id": 2937448,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015632058.331, "dur": 11.117, + "args": { + "External id": 2937449,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015632062.693, "dur": 6.351, + "args": { + "External id": 2937450,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632066.514, "dur": 0.705, + "args": { + "External id": 2937451,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015632082.121, "dur": 27.298, + "args": { + "External id": 2937452,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632084.508, "dur": 0.509, + "args": { + "External id": 2937453,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632086.595, "dur": 1.887, + "args": { + "External id": 2937454,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632089.791, "dur": 0.384, + "args": { + "External id": 2937455,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632091.630, "dur": 1.680, + "args": { + "External id": 2937456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632096.926, "dur": 0.425, + "args": { + "External id": 2937457,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632098.782, "dur": 0.292, + "args": { + "External id": 2937458,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632100.413, "dur": 0.164, + "args": { + "External id": 2937459,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632103.518, "dur": 0.413, + "args": { + "External id": 2937460,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632105.047, "dur": 0.200, + "args": { + "External id": 2937461,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015632121.370, "dur": 42.149, + "args": { + "External id": 2937462,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015632228.147, "dur": 356.142, + "args": { + "External id": 2937463,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015632258.454, "dur": 321.236, + "args": { + "External id": 2937464,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7439, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015632268.743, "dur": 305.960, + "args": { + "External id": 2937465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015632606.383, "dur": 5.020, + "args": { + "External id": 2937466,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7441, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 1336755, "tid": 1336755, + "ts": 1555015632692.168, "dur": 17625.085, + "args": { + "External id": 2937467,"Record function id": 0, "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632786.611, "dur": 6.000, + "args": { + "External id": 2937468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632795.846, "dur": 1.319, + "args": { + "External id": 2937469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632798.801, "dur": 2.115, + "args": { + "External id": 2937470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632802.597, "dur": 0.907, + "args": { + "External id": 2937471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632805.074, "dur": 1.024, + "args": { + "External id": 2937472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632807.391, "dur": 0.705, + "args": { + "External id": 2937473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632811.272, "dur": 0.842, + "args": { + "External id": 2937474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632813.449, "dur": 1.920, + "args": { + "External id": 2937475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632817.007, "dur": 0.753, + "args": { + "External id": 2937476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015632819.273, "dur": 0.886, + "args": { + "External id": 2937477,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015632839.080, "dur": 17442.334, + "args": { + "External id": 2937478,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015632854.027, "dur": 17420.718, + "args": { + "External id": 2937479,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015632875.553, "dur": 12.910, + "args": { + "External id": 2937480,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015632891.167, "dur": 17350.510, + "args": { + "External id": 2937481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015632893.809, "dur": 17347.235, + "args": { + "External id": 2937482,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015632899.210, "dur": 5.562, + "args": { + "External id": 2937483,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015632906.411, "dur": 17331.788, + "args": { + "External id": 2937484,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015650445.245, "dur": 35.392, + "args": { + "External id": 2937485,"Sequence number": 29294597, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7460 + } + }, + { + "ph": "s", "id": 191, "pid": 1336755, "tid": 1336755, "ts": 1555015650445.245, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015650468.668, "dur": 7.178, + "args": { + "External id": 2937486,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015650471.643, "dur": 3.973, + "args": { + "External id": 2937487,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015650540.838, "dur": 76.556, + "args": { + "External id": 2937488,"Record function id": 0, "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015650619.028, "dur": 1058.715, + "args": { + "External id": 2937489,"Record function id": 0, "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015650657.800, "dur": 1007.541, + "args": { + "External id": 2937490,"Sequence number": 29294598, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7465 + } + }, + { + "ph": "s", "id": 190, "pid": 1336755, "tid": 1336755, "ts": 1555015650657.800, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015650722.155, "dur": 40.412, + "args": { + "External id": 2937491,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015650775.618, "dur": 104.555, + "args": { + "External id": 2937492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015650890.012, "dur": 39.012, + "args": { + "External id": 2937493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015650935.576, "dur": 31.918, + "args": { + "External id": 2937494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015651033.656, "dur": 27.686, + "args": { + "External id": 2937495,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015651080.736, "dur": 17.124, + "args": { + "External id": 2937496,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015651115.203, "dur": 142.425, + "args": { + "External id": 2937497,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015651176.728, "dur": 12.329, + "args": { + "External id": 2937498,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015651181.644, "dur": 6.220, + "args": { + "External id": 2937499,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015651191.977, "dur": 5.860, + "args": { + "External id": 2937500,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015651199.267, "dur": 3.060, + "args": { + "External id": 2937501,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015651204.662, "dur": 3.538, + "args": { + "External id": 2937502,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015651268.265, "dur": 53.848, + "args": { + "External id": 2937503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015651350.995, "dur": 27.119, + "args": { + "External id": 2937504,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015651387.892, "dur": 42.921, + "args": { + "External id": 2937505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015651437.793, "dur": 35.431, + "args": { + "External id": 2937506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015651495.906, "dur": 26.527, + "args": { + "External id": 2937507,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015651527.934, "dur": 35.177, + "args": { + "External id": 2937508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015651580.281, "dur": 19.470, + "args": { + "External id": 2937509,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 1336755, "tid": 1336755, + "ts": 1555015651739.963, "dur": 74.447, + "args": { + "External id": 2937510,"Record function id": 0, "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015651884.272, "dur": 46.785, + "args": { + "External id": 2937511,"Record function id": 0, "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 1336755, "tid": 1336755, + "ts": 1555015651940.629, "dur": 18760.574, + "args": { + "External id": 2937512,"Record function id": 0, "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 1336755, "tid": 1336755, + "ts": 1555015651950.029, "dur": 891.140, + "args": { + "External id": 2937513,"Record function id": 0, "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015652072.686, "dur": 8.975, + "args": { + "External id": 2937514,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015652094.963, "dur": 33.220, + "args": { + "External id": 2937515,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652100.306, "dur": 2.119, + "args": { + "External id": 2937516,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652106.596, "dur": 0.248, + "args": { + "External id": 2937517,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652108.234, "dur": 0.456, + "args": { + "External id": 2937518,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652109.956, "dur": 0.526, + "args": { + "External id": 2937519,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652113.286, "dur": 0.444, + "args": { + "External id": 2937520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652115.020, "dur": 0.438, + "args": { + "External id": 2937521,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652116.844, "dur": 1.739, + "args": { + "External id": 2937522,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652119.929, "dur": 0.189, + "args": { + "External id": 2937523,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652121.444, "dur": 0.263, + "args": { + "External id": 2937524,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015652139.189, "dur": 54.882, + "args": { + "External id": 2937525,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015652229.901, "dur": 113.886, + "args": { + "External id": 2937526,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015652240.536, "dur": 4.983, + "args": { + "External id": 2937527,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015652250.997, "dur": 10.374, + "args": { + "External id": 2937528,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015652255.260, "dur": 5.679, + "args": { + "External id": 2937529,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652258.618, "dur": 0.753, + "args": { + "External id": 2937530,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015652269.073, "dur": 29.515, + "args": { + "External id": 2937531,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652271.770, "dur": 2.143, + "args": { + "External id": 2937532,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652275.677, "dur": 0.466, + "args": { + "External id": 2937533,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652277.832, "dur": 0.357, + "args": { + "External id": 2937534,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652281.137, "dur": 1.523, + "args": { + "External id": 2937535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652284.223, "dur": 0.493, + "args": { + "External id": 2937536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652285.897, "dur": 0.477, + "args": { + "External id": 2937537,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652289.228, "dur": 0.357, + "args": { + "External id": 2937538,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652290.976, "dur": 0.302, + "args": { + "External id": 2937539,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015652292.571, "dur": 1.685, + "args": { + "External id": 2937540,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015652311.586, "dur": 23.957, + "args": { + "External id": 2937541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015652397.505, "dur": 358.764, + "args": { + "External id": 2937542,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015652427.954, "dur": 323.843, + "args": { + "External id": 2937543,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7518, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015652438.209, "dur": 308.320, + "args": { + "External id": 2937544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015652778.222, "dur": 2.175, + "args": { + "External id": 2937545,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7520, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 1336755, "tid": 1336755, + "ts": 1555015652860.007, "dur": 17638.426, + "args": { + "External id": 2937546,"Record function id": 0, "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015652952.723, "dur": 5.069, + "args": { + "External id": 2937547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015652972.562, "dur": 1.106, + "args": { + "External id": 2937548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015652975.340, "dur": 2.323, + "args": { + "External id": 2937549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015652980.786, "dur": 1.114, + "args": { + "External id": 2937550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015653024.561, "dur": 1.392, + "args": { + "External id": 2937551,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015653027.237, "dur": 1.002, + "args": { + "External id": 2937552,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015653032.055, "dur": 1.015, + "args": { + "External id": 2937553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015653035.982, "dur": 2.061, + "args": { + "External id": 2937554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015653039.549, "dur": 0.729, + "args": { + "External id": 2937555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015653041.698, "dur": 0.614, + "args": { + "External id": 2937556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015653063.191, "dur": 17396.139, + "args": { + "External id": 2937557,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015653078.634, "dur": 17374.325, + "args": { + "External id": 2937558,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015653097.085, "dur": 13.446, + "args": { + "External id": 2937559,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015653115.116, "dur": 17305.338, + "args": { + "External id": 2937560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015653117.587, "dur": 17302.364, + "args": { + "External id": 2937561,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015653126.848, "dur": 5.486, + "args": { + "External id": 2937562,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015653134.006, "dur": 17283.151, + "args": { + "External id": 2937563,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015670637.833, "dur": 36.689, + "args": { + "External id": 2937564,"Sequence number": 29294599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7539 + } + }, + { + "ph": "s", "id": 189, "pid": 1336755, "tid": 1336755, "ts": 1555015670637.833, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015670661.441, "dur": 8.064, + "args": { + "External id": 2937565,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015670664.988, "dur": 4.335, + "args": { + "External id": 2937566,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015670739.477, "dur": 74.145, + "args": { + "External id": 2937567,"Record function id": 0, "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015670815.487, "dur": 1034.390, + "args": { + "External id": 2937568,"Record function id": 0, "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015670854.676, "dur": 982.258, + "args": { + "External id": 2937569,"Sequence number": 29294600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7544 + } + }, + { + "ph": "s", "id": 188, "pid": 1336755, "tid": 1336755, "ts": 1555015670854.676, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015670918.160, "dur": 38.190, + "args": { + "External id": 2937570,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015670968.186, "dur": 131.663, + "args": { + "External id": 2937571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015671113.876, "dur": 50.967, + "args": { + "External id": 2937572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015671176.079, "dur": 35.052, + "args": { + "External id": 2937573,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015671236.192, "dur": 26.151, + "args": { + "External id": 2937574,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015671280.400, "dur": 15.105, + "args": { + "External id": 2937575,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015671313.703, "dur": 123.186, + "args": { + "External id": 2937576,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015671361.714, "dur": 11.158, + "args": { + "External id": 2937577,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015671366.583, "dur": 5.447, + "args": { + "External id": 2937578,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015671375.673, "dur": 5.247, + "args": { + "External id": 2937579,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015671382.091, "dur": 1.169, + "args": { + "External id": 2937580,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015671385.756, "dur": 3.175, + "args": { + "External id": 2937581,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015671447.384, "dur": 47.003, + "args": { + "External id": 2937582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015671522.331, "dur": 28.312, + "args": { + "External id": 2937583,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015671559.027, "dur": 41.486, + "args": { + "External id": 2937584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015671608.562, "dur": 35.337, + "args": { + "External id": 2937585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015671664.005, "dur": 27.504, + "args": { + "External id": 2937586,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015671696.808, "dur": 34.232, + "args": { + "External id": 2937587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015671751.434, "dur": 18.097, + "args": { + "External id": 2937588,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 1336755, "tid": 1336755, + "ts": 1555015671912.069, "dur": 110.136, + "args": { + "External id": 2937589,"Record function id": 0, "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015672098.019, "dur": 60.455, + "args": { + "External id": 2937590,"Record function id": 0, "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 1336755, "tid": 1336755, + "ts": 1555015672170.210, "dur": 18781.800, + "args": { + "External id": 2937591,"Record function id": 0, "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 1336755, "tid": 1336755, + "ts": 1555015672180.647, "dur": 855.152, + "args": { + "External id": 2937592,"Record function id": 0, "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015672259.721, "dur": 8.057, + "args": { + "External id": 2937593,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015672280.290, "dur": 34.482, + "args": { + "External id": 2937594,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672285.520, "dur": 2.034, + "args": { + "External id": 2937595,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672291.934, "dur": 0.256, + "args": { + "External id": 2937596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672293.580, "dur": 0.467, + "args": { + "External id": 2937597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672295.257, "dur": 0.401, + "args": { + "External id": 2937598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672298.367, "dur": 0.289, + "args": { + "External id": 2937599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672299.845, "dur": 0.435, + "args": { + "External id": 2937600,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672301.612, "dur": 2.997, + "args": { + "External id": 2937601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672306.269, "dur": 0.530, + "args": { + "External id": 2937602,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672308.071, "dur": 0.242, + "args": { + "External id": 2937603,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015672325.430, "dur": 40.699, + "args": { + "External id": 2937604,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015672399.298, "dur": 109.123, + "args": { + "External id": 2937605,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015672409.938, "dur": 4.400, + "args": { + "External id": 2937606,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015672419.153, "dur": 10.249, + "args": { + "External id": 2937607,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015672423.669, "dur": 5.337, + "args": { + "External id": 2937608,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672427.311, "dur": 0.442, + "args": { + "External id": 2937609,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015672436.150, "dur": 30.373, + "args": { + "External id": 2937610,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672438.171, "dur": 2.095, + "args": { + "External id": 2937611,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672442.018, "dur": 0.423, + "args": { + "External id": 2937612,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672446.489, "dur": 0.183, + "args": { + "External id": 2937613,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672449.621, "dur": 0.905, + "args": { + "External id": 2937614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672452.084, "dur": 0.267, + "args": { + "External id": 2937615,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672454.091, "dur": 1.661, + "args": { + "External id": 2937616,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672456.941, "dur": 0.329, + "args": { + "External id": 2937617,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672458.650, "dur": 0.158, + "args": { + "External id": 2937618,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015672461.712, "dur": 0.179, + "args": { + "External id": 2937619,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015672477.737, "dur": 23.067, + "args": { + "External id": 2937620,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015672559.498, "dur": 351.775, + "args": { + "External id": 2937621,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015672588.636, "dur": 317.915, + "args": { + "External id": 2937622,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7597, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015672598.631, "dur": 302.820, + "args": { + "External id": 2937623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015672932.696, "dur": 2.728, + "args": { + "External id": 2937624,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7599, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 1336755, "tid": 1336755, + "ts": 1555015673058.239, "dur": 17699.021, + "args": { + "External id": 2937625,"Record function id": 0, "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673178.148, "dur": 6.908, + "args": { + "External id": 2937626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673189.083, "dur": 1.142, + "args": { + "External id": 2937627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673191.973, "dur": 1.855, + "args": { + "External id": 2937628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673195.507, "dur": 1.116, + "args": { + "External id": 2937629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673198.107, "dur": 0.769, + "args": { + "External id": 2937630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673200.009, "dur": 0.749, + "args": { + "External id": 2937631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673203.805, "dur": 1.070, + "args": { + "External id": 2937632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673206.571, "dur": 1.759, + "args": { + "External id": 2937633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673209.919, "dur": 0.553, + "args": { + "External id": 2937634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015673211.902, "dur": 0.702, + "args": { + "External id": 2937635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015673233.747, "dur": 17484.740, + "args": { + "External id": 2937636,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015673249.589, "dur": 17462.516, + "args": { + "External id": 2937637,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015673272.181, "dur": 12.369, + "args": { + "External id": 2937638,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015673287.301, "dur": 17392.581, + "args": { + "External id": 2937639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015673289.639, "dur": 17389.753, + "args": { + "External id": 2937640,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015673295.236, "dur": 5.185, + "args": { + "External id": 2937641,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015673302.060, "dur": 17374.470, + "args": { + "External id": 2937642,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015690888.802, "dur": 38.285, + "args": { + "External id": 2937643,"Sequence number": 29294601, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7618 + } + }, + { + "ph": "s", "id": 187, "pid": 1336755, "tid": 1336755, "ts": 1555015690888.802, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015690915.003, "dur": 7.078, + "args": { + "External id": 2937644,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015690917.885, "dur": 3.972, + "args": { + "External id": 2937645,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015691015.863, "dur": 77.043, + "args": { + "External id": 2937646,"Record function id": 0, "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015691095.086, "dur": 1064.408, + "args": { + "External id": 2937647,"Record function id": 0, "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015691136.113, "dur": 995.222, + "args": { + "External id": 2937648,"Sequence number": 29294602, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7623 + } + }, + { + "ph": "s", "id": 186, "pid": 1336755, "tid": 1336755, "ts": 1555015691136.113, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015691215.150, "dur": 44.329, + "args": { + "External id": 2937649,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015691272.116, "dur": 103.322, + "args": { + "External id": 2937650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015691387.239, "dur": 44.384, + "args": { + "External id": 2937651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015691437.901, "dur": 32.216, + "args": { + "External id": 2937652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015691495.491, "dur": 24.845, + "args": { + "External id": 2937653,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015691534.832, "dur": 14.836, + "args": { + "External id": 2937654,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015691567.123, "dur": 124.150, + "args": { + "External id": 2937655,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015691614.369, "dur": 11.141, + "args": { + "External id": 2937656,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015691619.423, "dur": 5.327, + "args": { + "External id": 2937657,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015691628.234, "dur": 5.867, + "args": { + "External id": 2937658,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015691635.572, "dur": 1.158, + "args": { + "External id": 2937659,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015691641.146, "dur": 4.070, + "args": { + "External id": 2937660,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015691701.141, "dur": 46.507, + "args": { + "External id": 2937661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015691774.410, "dur": 27.732, + "args": { + "External id": 2937662,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015691810.658, "dur": 42.654, + "args": { + "External id": 2937663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015691860.016, "dur": 35.762, + "args": { + "External id": 2937664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015691918.222, "dur": 27.380, + "args": { + "External id": 2937665,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015691951.110, "dur": 70.224, + "args": { + "External id": 2937666,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015692044.323, "dur": 19.828, + "args": { + "External id": 2937667,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 1336755, "tid": 1336755, + "ts": 1555015692225.727, "dur": 74.767, + "args": { + "External id": 2937668,"Record function id": 0, "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015692371.571, "dur": 44.903, + "args": { + "External id": 2937669,"Record function id": 0, "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 1336755, "tid": 1336755, + "ts": 1555015692425.283, "dur": 18853.531, + "args": { + "External id": 2937670,"Record function id": 0, "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 1336755, "tid": 1336755, + "ts": 1555015692432.918, "dur": 969.635, + "args": { + "External id": 2937671,"Record function id": 0, "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015692510.780, "dur": 8.500, + "args": { + "External id": 2937672,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015692531.996, "dur": 36.897, + "args": { + "External id": 2937673,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692537.297, "dur": 2.512, + "args": { + "External id": 2937674,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692544.063, "dur": 0.204, + "args": { + "External id": 2937675,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692545.889, "dur": 0.536, + "args": { + "External id": 2937676,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692548.069, "dur": 0.605, + "args": { + "External id": 2937677,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692552.106, "dur": 0.371, + "args": { + "External id": 2937678,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692554.107, "dur": 0.407, + "args": { + "External id": 2937679,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692556.067, "dur": 2.632, + "args": { + "External id": 2937680,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692559.953, "dur": 0.342, + "args": { + "External id": 2937681,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692561.769, "dur": 0.310, + "args": { + "External id": 2937682,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015692578.993, "dur": 38.876, + "args": { + "External id": 2937683,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015692649.436, "dur": 107.715, + "args": { + "External id": 2937684,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015692659.752, "dur": 4.073, + "args": { + "External id": 2937685,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015692668.731, "dur": 9.724, + "args": { + "External id": 2937686,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015692672.642, "dur": 5.391, + "args": { + "External id": 2937687,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692676.270, "dur": 0.439, + "args": { + "External id": 2937688,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015692685.453, "dur": 28.790, + "args": { + "External id": 2937689,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692687.700, "dur": 1.869, + "args": { + "External id": 2937690,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692690.944, "dur": 0.257, + "args": { + "External id": 2937691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692692.865, "dur": 0.172, + "args": { + "External id": 2937692,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692696.047, "dur": 1.316, + "args": { + "External id": 2937693,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692698.810, "dur": 0.368, + "args": { + "External id": 2937694,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692700.583, "dur": 0.161, + "args": { + "External id": 2937695,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692704.036, "dur": 0.516, + "args": { + "External id": 2937696,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692705.984, "dur": 0.498, + "args": { + "External id": 2937697,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015692707.664, "dur": 1.794, + "args": { + "External id": 2937698,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015692726.294, "dur": 23.403, + "args": { + "External id": 2937699,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015692815.447, "dur": 483.595, + "args": { + "External id": 2937700,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015692846.416, "dur": 446.742, + "args": { + "External id": 2937701,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7676, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015692856.247, "dur": 430.523, + "args": { + "External id": 2937702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015693325.215, "dur": 2.604, + "args": { + "External id": 2937703,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7678, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 1336755, "tid": 1336755, + "ts": 1555015693423.833, "dur": 17651.544, + "args": { + "External id": 2937704,"Record function id": 0, "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693526.730, "dur": 6.309, + "args": { + "External id": 2937705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693536.670, "dur": 1.036, + "args": { + "External id": 2937706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693539.321, "dur": 2.046, + "args": { + "External id": 2937707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693543.391, "dur": 1.003, + "args": { + "External id": 2937708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693545.655, "dur": 0.655, + "args": { + "External id": 2937709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693547.641, "dur": 1.127, + "args": { + "External id": 2937710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693552.014, "dur": 0.905, + "args": { + "External id": 2937711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693554.429, "dur": 2.164, + "args": { + "External id": 2937712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693558.275, "dur": 0.782, + "args": { + "External id": 2937713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015693560.643, "dur": 1.093, + "args": { + "External id": 2937714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015693581.396, "dur": 17457.350, + "args": { + "External id": 2937715,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015693597.256, "dur": 17434.801, + "args": { + "External id": 2937716,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015693618.815, "dur": 13.162, + "args": { + "External id": 2937717,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015693635.379, "dur": 17341.501, + "args": { + "External id": 2937718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015693637.914, "dur": 17338.421, + "args": { + "External id": 2937719,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015693643.360, "dur": 4.872, + "args": { + "External id": 2937720,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015693649.660, "dur": 17324.035, + "args": { + "External id": 2937721,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015711219.260, "dur": 35.128, + "args": { + "External id": 2937722,"Sequence number": 29294603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7697 + } + }, + { + "ph": "s", "id": 185, "pid": 1336755, "tid": 1336755, "ts": 1555015711219.260, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015711241.655, "dur": 7.834, + "args": { + "External id": 2937723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015711244.607, "dur": 4.489, + "args": { + "External id": 2937724,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015711314.172, "dur": 75.049, + "args": { + "External id": 2937725,"Record function id": 0, "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015711390.802, "dur": 1069.681, + "args": { + "External id": 2937726,"Record function id": 0, "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015711430.071, "dur": 1016.958, + "args": { + "External id": 2937727,"Sequence number": 29294604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7702 + } + }, + { + "ph": "s", "id": 184, "pid": 1336755, "tid": 1336755, "ts": 1555015711430.071, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015711491.896, "dur": 39.657, + "args": { + "External id": 2937728,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015711542.611, "dur": 108.649, + "args": { + "External id": 2937729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015711660.822, "dur": 39.425, + "args": { + "External id": 2937730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015711708.747, "dur": 31.995, + "args": { + "External id": 2937731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015711764.504, "dur": 25.142, + "args": { + "External id": 2937732,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015711806.151, "dur": 15.194, + "args": { + "External id": 2937733,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015711837.631, "dur": 119.938, + "args": { + "External id": 2937734,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015711883.065, "dur": 11.411, + "args": { + "External id": 2937735,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015711888.072, "dur": 5.692, + "args": { + "External id": 2937736,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015711897.253, "dur": 5.296, + "args": { + "External id": 2937737,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015711903.984, "dur": 1.202, + "args": { + "External id": 2937738,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015711907.498, "dur": 3.710, + "args": { + "External id": 2937739,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015711967.753, "dur": 95.952, + "args": { + "External id": 2937740,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015712097.947, "dur": 31.926, + "args": { + "External id": 2937741,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015712138.353, "dur": 62.064, + "args": { + "External id": 2937742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015712212.174, "dur": 37.237, + "args": { + "External id": 2937743,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015712271.845, "dur": 28.222, + "args": { + "External id": 2937744,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015712305.123, "dur": 35.331, + "args": { + "External id": 2937745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015712360.396, "dur": 17.246, + "args": { + "External id": 2937746,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 1336755, "tid": 1336755, + "ts": 1555015712523.072, "dur": 75.573, + "args": { + "External id": 2937747,"Record function id": 0, "Ev Idx": 7722 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015712668.966, "dur": 45.680, + "args": { + "External id": 2937748,"Record function id": 0, "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 1336755, "tid": 1336755, + "ts": 1555015712723.896, "dur": 18650.520, + "args": { + "External id": 2937749,"Record function id": 0, "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 1336755, "tid": 1336755, + "ts": 1555015712731.599, "dur": 904.239, + "args": { + "External id": 2937750,"Record function id": 0, "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015712812.058, "dur": 7.167, + "args": { + "External id": 2937751,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015712831.624, "dur": 38.091, + "args": { + "External id": 2937752,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712836.765, "dur": 2.436, + "args": { + "External id": 2937753,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712843.988, "dur": 0.610, + "args": { + "External id": 2937754,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712846.335, "dur": 0.321, + "args": { + "External id": 2937755,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712848.210, "dur": 0.523, + "args": { + "External id": 2937756,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712851.720, "dur": 0.533, + "args": { + "External id": 2937757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712853.832, "dur": 0.389, + "args": { + "External id": 2937758,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712855.654, "dur": 3.135, + "args": { + "External id": 2937759,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712860.796, "dur": 0.406, + "args": { + "External id": 2937760,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712862.846, "dur": 0.292, + "args": { + "External id": 2937761,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015712880.750, "dur": 35.690, + "args": { + "External id": 2937762,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015712948.860, "dur": 181.693, + "args": { + "External id": 2937763,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015712958.761, "dur": 4.112, + "args": { + "External id": 2937764,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015712967.969, "dur": 9.619, + "args": { + "External id": 2937765,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015712972.171, "dur": 5.029, + "args": { + "External id": 2937766,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015712975.483, "dur": 0.528, + "args": { + "External id": 2937767,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015713045.443, "dur": 35.871, + "args": { + "External id": 2937768,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713048.203, "dur": 2.456, + "args": { + "External id": 2937769,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713053.160, "dur": 0.558, + "args": { + "External id": 2937770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713055.344, "dur": 0.513, + "args": { + "External id": 2937771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713059.223, "dur": 1.595, + "args": { + "External id": 2937772,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713062.486, "dur": 0.388, + "args": { + "External id": 2937773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713064.489, "dur": 0.426, + "args": { + "External id": 2937774,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713067.873, "dur": 0.186, + "args": { + "External id": 2937775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713069.603, "dur": 0.376, + "args": { + "External id": 2937776,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713074.745, "dur": 1.766, + "args": { + "External id": 2937777,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015713093.540, "dur": 28.177, + "args": { + "External id": 2937778,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015713200.734, "dur": 349.754, + "args": { + "External id": 2937779,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015713231.760, "dur": 314.149, + "args": { + "External id": 2937780,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7755, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015713242.598, "dur": 298.173, + "args": { + "External id": 2937781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015713571.406, "dur": 2.300, + "args": { + "External id": 2937782,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7757, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 1336755, "tid": 1336755, + "ts": 1555015713655.555, "dur": 17517.382, + "args": { + "External id": 2937783,"Record function id": 0, "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713753.354, "dur": 6.134, + "args": { + "External id": 2937784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713762.616, "dur": 1.156, + "args": { + "External id": 2937785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713765.609, "dur": 2.783, + "args": { + "External id": 2937786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713769.973, "dur": 0.688, + "args": { + "External id": 2937787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713771.908, "dur": 0.926, + "args": { + "External id": 2937788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713774.176, "dur": 0.765, + "args": { + "External id": 2937789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713777.851, "dur": 1.094, + "args": { + "External id": 2937790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713780.401, "dur": 2.142, + "args": { + "External id": 2937791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713784.323, "dur": 0.686, + "args": { + "External id": 2937792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015713786.467, "dur": 0.962, + "args": { + "External id": 2937793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015713807.204, "dur": 17311.072, + "args": { + "External id": 2937794,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015713827.961, "dur": 17283.248, + "args": { + "External id": 2937795,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015713845.971, "dur": 12.640, + "args": { + "External id": 2937796,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015713861.416, "dur": 17215.756, + "args": { + "External id": 2937797,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015713863.919, "dur": 17212.515, + "args": { + "External id": 2937798,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015713869.150, "dur": 5.449, + "args": { + "External id": 2937799,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015713876.462, "dur": 17196.844, + "args": { + "External id": 2937800,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015731313.202, "dur": 36.247, + "args": { + "External id": 2937801,"Sequence number": 29294605, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7776 + } + }, + { + "ph": "s", "id": 183, "pid": 1336755, "tid": 1336755, "ts": 1555015731313.202, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015731336.036, "dur": 8.813, + "args": { + "External id": 2937802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015731339.896, "dur": 4.702, + "args": { + "External id": 2937803,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015731410.511, "dur": 73.403, + "args": { + "External id": 2937804,"Record function id": 0, "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015731485.707, "dur": 1065.721, + "args": { + "External id": 2937805,"Record function id": 0, "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015731525.284, "dur": 1013.852, + "args": { + "External id": 2937806,"Sequence number": 29294606, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7781 + } + }, + { + "ph": "s", "id": 182, "pid": 1336755, "tid": 1336755, "ts": 1555015731525.284, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015731586.309, "dur": 43.717, + "args": { + "External id": 2937807,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015731642.438, "dur": 103.718, + "args": { + "External id": 2937808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015731755.583, "dur": 39.491, + "args": { + "External id": 2937809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015731803.649, "dur": 31.505, + "args": { + "External id": 2937810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015731858.911, "dur": 24.197, + "args": { + "External id": 2937811,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015731898.854, "dur": 13.701, + "args": { + "External id": 2937812,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015731928.696, "dur": 175.337, + "args": { + "External id": 2937813,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015731973.205, "dur": 60.208, + "args": { + "External id": 2937814,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015731978.252, "dur": 53.794, + "args": { + "External id": 2937815,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015732037.358, "dur": 5.617, + "args": { + "External id": 2937816,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015732044.166, "dur": 1.285, + "args": { + "External id": 2937817,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015732048.088, "dur": 3.886, + "args": { + "External id": 2937818,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015732114.698, "dur": 68.961, + "args": { + "External id": 2937819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015732217.043, "dur": 30.403, + "args": { + "External id": 2937820,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015732256.312, "dur": 44.496, + "args": { + "External id": 2937821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015732308.611, "dur": 35.716, + "args": { + "External id": 2937822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015732366.099, "dur": 29.148, + "args": { + "External id": 2937823,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015732400.295, "dur": 35.078, + "args": { + "External id": 2937824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015732453.922, "dur": 17.801, + "args": { + "External id": 2937825,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 1336755, "tid": 1336755, + "ts": 1555015732613.184, "dur": 73.484, + "args": { + "External id": 2937826,"Record function id": 0, "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015732756.919, "dur": 44.115, + "args": { + "External id": 2937827,"Record function id": 0, "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 1336755, "tid": 1336755, + "ts": 1555015732810.514, "dur": 18956.182, + "args": { + "External id": 2937828,"Record function id": 0, "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 1336755, "tid": 1336755, + "ts": 1555015732818.506, "dur": 920.162, + "args": { + "External id": 2937829,"Record function id": 0, "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015732897.184, "dur": 7.132, + "args": { + "External id": 2937830,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015732916.980, "dur": 35.941, + "args": { + "External id": 2937831,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732921.976, "dur": 2.155, + "args": { + "External id": 2937832,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732928.909, "dur": 0.324, + "args": { + "External id": 2937833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732930.649, "dur": 0.355, + "args": { + "External id": 2937834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732932.691, "dur": 0.298, + "args": { + "External id": 2937835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732935.884, "dur": 0.416, + "args": { + "External id": 2937836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732937.647, "dur": 0.446, + "args": { + "External id": 2937837,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732939.533, "dur": 2.973, + "args": { + "External id": 2937838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732944.221, "dur": 0.182, + "args": { + "External id": 2937839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015732946.358, "dur": 0.159, + "args": { + "External id": 2937840,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015732963.459, "dur": 81.509, + "args": { + "External id": 2937841,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015733081.670, "dur": 167.420, + "args": { + "External id": 2937842,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015733093.086, "dur": 4.945, + "args": { + "External id": 2937843,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015733103.470, "dur": 10.537, + "args": { + "External id": 2937844,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015733107.696, "dur": 5.940, + "args": { + "External id": 2937845,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733111.258, "dur": 0.673, + "args": { + "External id": 2937846,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015733121.673, "dur": 46.322, + "args": { + "External id": 2937847,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733124.741, "dur": 1.979, + "args": { + "External id": 2937848,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733128.276, "dur": 0.370, + "args": { + "External id": 2937849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733130.463, "dur": 0.355, + "args": { + "External id": 2937850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733134.378, "dur": 0.982, + "args": { + "External id": 2937851,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733136.607, "dur": 0.454, + "args": { + "External id": 2937852,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733138.791, "dur": 0.189, + "args": { + "External id": 2937853,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733156.275, "dur": 0.668, + "args": { + "External id": 2937854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733159.397, "dur": 0.387, + "args": { + "External id": 2937855,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733161.185, "dur": 2.138, + "args": { + "External id": 2937856,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015733210.213, "dur": 29.693, + "args": { + "External id": 2937857,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015733304.790, "dur": 351.691, + "args": { + "External id": 2937858,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015733336.143, "dur": 315.830, + "args": { + "External id": 2937859,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7834, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015733347.838, "dur": 299.007, + "args": { + "External id": 2937860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015733676.638, "dur": 2.271, + "args": { + "External id": 2937861,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7836, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 1336755, "tid": 1336755, + "ts": 1555015733759.595, "dur": 17812.161, + "args": { + "External id": 2937862,"Record function id": 0, "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733856.860, "dur": 5.274, + "args": { + "External id": 2937863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733865.140, "dur": 1.339, + "args": { + "External id": 2937864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733868.235, "dur": 2.050, + "args": { + "External id": 2937865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733871.966, "dur": 0.904, + "args": { + "External id": 2937866,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733874.189, "dur": 0.917, + "args": { + "External id": 2937867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733876.513, "dur": 0.754, + "args": { + "External id": 2937868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733880.288, "dur": 0.820, + "args": { + "External id": 2937869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733882.497, "dur": 1.912, + "args": { + "External id": 2937870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733885.984, "dur": 0.635, + "args": { + "External id": 2937871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015733888.144, "dur": 0.662, + "args": { + "External id": 2937872,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015733908.872, "dur": 17624.604, + "args": { + "External id": 2937873,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015733925.097, "dur": 17601.457, + "args": { + "External id": 2937874,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015733944.996, "dur": 12.633, + "args": { + "External id": 2937875,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015733960.376, "dur": 17532.087, + "args": { + "External id": 2937876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015733962.828, "dur": 17529.081, + "args": { + "External id": 2937877,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015733968.582, "dur": 5.209, + "args": { + "External id": 2937878,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015733975.627, "dur": 17513.371, + "args": { + "External id": 2937879,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015751706.089, "dur": 36.210, + "args": { + "External id": 2937880,"Sequence number": 29294607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7855 + } + }, + { + "ph": "s", "id": 181, "pid": 1336755, "tid": 1336755, "ts": 1555015751706.089, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015751730.307, "dur": 7.157, + "args": { + "External id": 2937881,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015751733.397, "dur": 3.867, + "args": { + "External id": 2937882,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015751803.161, "dur": 75.565, + "args": { + "External id": 2937883,"Record function id": 0, "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015751880.169, "dur": 1055.741, + "args": { + "External id": 2937884,"Record function id": 0, "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015751921.059, "dur": 1002.513, + "args": { + "External id": 2937885,"Sequence number": 29294608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7860 + } + }, + { + "ph": "s", "id": 180, "pid": 1336755, "tid": 1336755, "ts": 1555015751921.059, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015752021.399, "dur": 44.020, + "args": { + "External id": 2937886,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015752080.072, "dur": 119.995, + "args": { + "External id": 2937887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015752213.099, "dur": 42.012, + "args": { + "External id": 2937888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015752263.830, "dur": 34.259, + "args": { + "External id": 2937889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015752323.343, "dur": 28.047, + "args": { + "External id": 2937890,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015752370.274, "dur": 16.474, + "args": { + "External id": 2937891,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015752404.263, "dur": 123.824, + "args": { + "External id": 2937892,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015752451.735, "dur": 11.426, + "args": { + "External id": 2937893,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015752456.825, "dur": 5.450, + "args": { + "External id": 2937894,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015752465.928, "dur": 5.597, + "args": { + "External id": 2937895,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015752472.833, "dur": 1.460, + "args": { + "External id": 2937896,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015752476.700, "dur": 3.786, + "args": { + "External id": 2937897,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015752537.575, "dur": 45.593, + "args": { + "External id": 2937898,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015752610.090, "dur": 28.247, + "args": { + "External id": 2937899,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015752646.730, "dur": 41.586, + "args": { + "External id": 2937900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015752696.444, "dur": 35.361, + "args": { + "External id": 2937901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015752753.684, "dur": 25.107, + "args": { + "External id": 2937902,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015752784.000, "dur": 35.126, + "args": { + "External id": 2937903,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015752837.350, "dur": 17.058, + "args": { + "External id": 2937904,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 1336755, "tid": 1336755, + "ts": 1555015753034.229, "dur": 73.250, + "args": { + "External id": 2937905,"Record function id": 0, "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015753198.311, "dur": 46.036, + "args": { + "External id": 2937906,"Record function id": 0, "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 1336755, "tid": 1336755, + "ts": 1555015753253.885, "dur": 18591.450, + "args": { + "External id": 2937907,"Record function id": 0, "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 1336755, "tid": 1336755, + "ts": 1555015753261.212, "dur": 916.180, + "args": { + "External id": 2937908,"Record function id": 0, "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015753342.115, "dur": 8.613, + "args": { + "External id": 2937909,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015753363.328, "dur": 33.493, + "args": { + "External id": 2937910,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753368.701, "dur": 2.168, + "args": { + "External id": 2937911,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753374.802, "dur": 0.321, + "args": { + "External id": 2937912,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753376.300, "dur": 0.427, + "args": { + "External id": 2937913,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753377.774, "dur": 0.654, + "args": { + "External id": 2937914,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753381.364, "dur": 0.389, + "args": { + "External id": 2937915,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753382.671, "dur": 0.642, + "args": { + "External id": 2937916,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753384.253, "dur": 3.097, + "args": { + "External id": 2937917,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753388.375, "dur": 0.426, + "args": { + "External id": 2937918,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753389.977, "dur": 0.286, + "args": { + "External id": 2937919,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015753407.754, "dur": 41.485, + "args": { + "External id": 2937920,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015753481.637, "dur": 105.908, + "args": { + "External id": 2937921,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015753492.080, "dur": 3.866, + "args": { + "External id": 2937922,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015753502.648, "dur": 9.937, + "args": { + "External id": 2937923,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015753506.946, "dur": 5.226, + "args": { + "External id": 2937924,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753510.212, "dur": 0.587, + "args": { + "External id": 2937925,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015753519.026, "dur": 26.917, + "args": { + "External id": 2937926,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753521.218, "dur": 1.891, + "args": { + "External id": 2937927,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753524.354, "dur": 0.425, + "args": { + "External id": 2937928,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753526.134, "dur": 0.219, + "args": { + "External id": 2937929,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753529.186, "dur": 1.350, + "args": { + "External id": 2937930,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753531.803, "dur": 0.338, + "args": { + "External id": 2937931,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753533.231, "dur": 0.288, + "args": { + "External id": 2937932,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753536.536, "dur": 0.324, + "args": { + "External id": 2937933,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753538.092, "dur": 0.432, + "args": { + "External id": 2937934,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015753539.585, "dur": 1.712, + "args": { + "External id": 2937935,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015753555.916, "dur": 23.965, + "args": { + "External id": 2937936,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015753635.725, "dur": 430.263, + "args": { + "External id": 2937937,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015753669.783, "dur": 391.080, + "args": { + "External id": 2937938,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7913, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015753679.778, "dur": 373.662, + "args": { + "External id": 2937939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015754089.306, "dur": 2.689, + "args": { + "External id": 2937940,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7915, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 1336755, "tid": 1336755, + "ts": 1555015754200.551, "dur": 17456.009, + "args": { + "External id": 2937941,"Record function id": 0, "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754298.583, "dur": 6.298, + "args": { + "External id": 2937942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754312.937, "dur": 1.351, + "args": { + "External id": 2937943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754316.128, "dur": 2.324, + "args": { + "External id": 2937944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754321.102, "dur": 0.934, + "args": { + "External id": 2937945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754323.158, "dur": 0.841, + "args": { + "External id": 2937946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754325.184, "dur": 0.853, + "args": { + "External id": 2937947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754327.384, "dur": 0.827, + "args": { + "External id": 2937948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754331.052, "dur": 2.276, + "args": { + "External id": 2937949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754334.770, "dur": 0.804, + "args": { + "External id": 2937950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015754336.794, "dur": 0.558, + "args": { + "External id": 2937951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015754355.538, "dur": 17262.018, + "args": { + "External id": 2937952,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015754370.557, "dur": 17240.226, + "args": { + "External id": 2937953,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015754388.440, "dur": 12.766, + "args": { + "External id": 2937954,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015754405.646, "dur": 17171.445, + "args": { + "External id": 2937955,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015754408.159, "dur": 17168.407, + "args": { + "External id": 2937956,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015754413.659, "dur": 5.210, + "args": { + "External id": 2937957,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015754420.411, "dur": 17153.789, + "args": { + "External id": 2937958,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015771786.386, "dur": 34.108, + "args": { + "External id": 2937959,"Sequence number": 29294609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7934 + } + }, + { + "ph": "s", "id": 179, "pid": 1336755, "tid": 1336755, "ts": 1555015771786.386, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015771808.386, "dur": 7.167, + "args": { + "External id": 2937960,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015771811.348, "dur": 3.936, + "args": { + "External id": 2937961,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015771882.386, "dur": 77.129, + "args": { + "External id": 2937962,"Record function id": 0, "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015771960.965, "dur": 1090.172, + "args": { + "External id": 2937963,"Record function id": 0, "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015772036.688, "dur": 999.907, + "args": { + "External id": 2937964,"Sequence number": 29294610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7939 + } + }, + { + "ph": "s", "id": 178, "pid": 1336755, "tid": 1336755, "ts": 1555015772036.688, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015772103.039, "dur": 56.361, + "args": { + "External id": 2937965,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015772174.635, "dur": 105.846, + "args": { + "External id": 2937966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015772290.483, "dur": 41.736, + "args": { + "External id": 2937967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015772340.192, "dur": 32.391, + "args": { + "External id": 2937968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015772395.343, "dur": 27.045, + "args": { + "External id": 2937969,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015772440.977, "dur": 16.440, + "args": { + "External id": 2937970,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015772474.899, "dur": 120.705, + "args": { + "External id": 2937971,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015772520.415, "dur": 10.980, + "args": { + "External id": 2937972,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015772525.437, "dur": 5.169, + "args": { + "External id": 2937973,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015772534.122, "dur": 5.170, + "args": { + "External id": 2937974,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015772540.678, "dur": 1.049, + "args": { + "External id": 2937975,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015772543.895, "dur": 3.888, + "args": { + "External id": 2937976,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015772605.182, "dur": 45.542, + "args": { + "External id": 2937977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015772678.531, "dur": 27.131, + "args": { + "External id": 2937978,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015772713.841, "dur": 41.871, + "args": { + "External id": 2937979,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015772763.614, "dur": 35.829, + "args": { + "External id": 2937980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015772828.908, "dur": 26.713, + "args": { + "External id": 2937981,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015772863.035, "dur": 37.008, + "args": { + "External id": 2937982,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015772918.025, "dur": 17.264, + "args": { + "External id": 2937983,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 1336755, "tid": 1336755, + "ts": 1555015773116.813, "dur": 90.300, + "args": { + "External id": 2937984,"Record function id": 0, "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015773281.447, "dur": 46.712, + "args": { + "External id": 2937985,"Record function id": 0, "Ev Idx": 7960 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 1336755, "tid": 1336755, + "ts": 1555015773337.839, "dur": 18725.794, + "args": { + "External id": 2937986,"Record function id": 0, "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 1336755, "tid": 1336755, + "ts": 1555015773347.083, "dur": 888.483, + "args": { + "External id": 2937987,"Record function id": 0, "Ev Idx": 7962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015773427.833, "dur": 8.254, + "args": { + "External id": 2937988,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015773448.459, "dur": 31.919, + "args": { + "External id": 2937989,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773453.937, "dur": 2.035, + "args": { + "External id": 2937990,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773459.680, "dur": 0.200, + "args": { + "External id": 2937991,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773460.994, "dur": 0.339, + "args": { + "External id": 2937992,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773462.480, "dur": 0.294, + "args": { + "External id": 2937993,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773465.465, "dur": 0.441, + "args": { + "External id": 2937994,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773467.545, "dur": 0.283, + "args": { + "External id": 2937995,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773468.978, "dur": 2.556, + "args": { + "External id": 2937996,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773472.913, "dur": 0.232, + "args": { + "External id": 2937997,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773474.105, "dur": 0.277, + "args": { + "External id": 2937998,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015773490.624, "dur": 42.230, + "args": { + "External id": 2937999,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015773564.078, "dur": 108.661, + "args": { + "External id": 2938000,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015773574.561, "dur": 4.138, + "args": { + "External id": 2938001,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015773584.106, "dur": 9.719, + "args": { + "External id": 2938002,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015773588.295, "dur": 5.115, + "args": { + "External id": 2938003,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773591.664, "dur": 0.583, + "args": { + "External id": 2938004,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015773600.604, "dur": 27.628, + "args": { + "External id": 2938005,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773602.766, "dur": 2.045, + "args": { + "External id": 2938006,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773606.299, "dur": 0.792, + "args": { + "External id": 2938007,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773608.222, "dur": 0.626, + "args": { + "External id": 2938008,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773612.117, "dur": 0.339, + "args": { + "External id": 2938009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773613.622, "dur": 0.667, + "args": { + "External id": 2938010,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773615.427, "dur": 0.649, + "args": { + "External id": 2938011,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773618.663, "dur": 0.610, + "args": { + "External id": 2938012,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773620.401, "dur": 0.514, + "args": { + "External id": 2938013,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015773621.714, "dur": 2.081, + "args": { + "External id": 2938014,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015773639.537, "dur": 25.767, + "args": { + "External id": 2938015,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015773723.083, "dur": 400.834, + "args": { + "External id": 2938016,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015773751.855, "dur": 367.210, + "args": { + "External id": 2938017,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7992, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015773762.351, "dur": 350.855, + "args": { + "External id": 2938018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015774162.099, "dur": 3.472, + "args": { + "External id": 2938019,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7994, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 1336755, "tid": 1336755, + "ts": 1555015774256.291, "dur": 17577.733, + "args": { + "External id": 2938020,"Record function id": 0, "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774354.790, "dur": 6.852, + "args": { + "External id": 2938021,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774365.220, "dur": 1.267, + "args": { + "External id": 2938022,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774368.189, "dur": 1.211, + "args": { + "External id": 2938023,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774370.943, "dur": 1.364, + "args": { + "External id": 2938024,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774373.579, "dur": 0.781, + "args": { + "External id": 2938025,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774375.652, "dur": 0.965, + "args": { + "External id": 2938026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774380.259, "dur": 0.873, + "args": { + "External id": 2938027,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774383.209, "dur": 2.043, + "args": { + "External id": 2938028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774386.626, "dur": 1.682, + "args": { + "External id": 2938029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015774389.629, "dur": 1.301, + "args": { + "External id": 2938030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015774409.447, "dur": 17387.475, + "args": { + "External id": 2938031,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015774424.286, "dur": 17366.565, + "args": { + "External id": 2938032,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015774447.971, "dur": 12.035, + "args": { + "External id": 2938033,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015774462.817, "dur": 17296.807, + "args": { + "External id": 2938034,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015774465.600, "dur": 17293.477, + "args": { + "External id": 2938035,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015774470.863, "dur": 5.219, + "args": { + "External id": 2938036,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015774477.914, "dur": 17278.550, + "args": { + "External id": 2938037,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015791964.546, "dur": 72.878, + "args": { + "External id": 2938038,"Sequence number": 29294611, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 8013 + } + }, + { + "ph": "s", "id": 177, "pid": 1336755, "tid": 1336755, "ts": 1555015791964.546, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015792023.950, "dur": 7.876, + "args": { + "External id": 2938039,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015792027.057, "dur": 4.358, + "args": { + "External id": 2938040,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015792100.191, "dur": 89.051, + "args": { + "External id": 2938041,"Record function id": 0, "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015792192.208, "dur": 1058.655, + "args": { + "External id": 2938042,"Record function id": 0, "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015792235.131, "dur": 1002.730, + "args": { + "External id": 2938043,"Sequence number": 29294612, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 8018 + } + }, + { + "ph": "s", "id": 176, "pid": 1336755, "tid": 1336755, "ts": 1555015792235.131, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015792301.093, "dur": 43.510, + "args": { + "External id": 2938044,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015792357.056, "dur": 106.523, + "args": { + "External id": 2938045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015792472.631, "dur": 38.434, + "args": { + "External id": 2938046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015792519.020, "dur": 32.230, + "args": { + "External id": 2938047,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015792574.309, "dur": 23.862, + "args": { + "External id": 2938048,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015792613.656, "dur": 14.315, + "args": { + "External id": 2938049,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015792645.505, "dur": 122.458, + "args": { + "External id": 2938050,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015792693.348, "dur": 10.910, + "args": { + "External id": 2938051,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015792698.228, "dur": 5.179, + "args": { + "External id": 2938052,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015792706.846, "dur": 5.242, + "args": { + "External id": 2938053,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015792713.428, "dur": 1.727, + "args": { + "External id": 2938054,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015792717.710, "dur": 2.544, + "args": { + "External id": 2938055,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015792778.444, "dur": 50.590, + "args": { + "External id": 2938056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015792857.335, "dur": 26.296, + "args": { + "External id": 2938057,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015792890.996, "dur": 41.534, + "args": { + "External id": 2938058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015792941.355, "dur": 34.851, + "args": { + "External id": 2938059,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015793038.832, "dur": 27.711, + "args": { + "External id": 2938060,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015793074.269, "dur": 39.663, + "args": { + "External id": 2938061,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015793134.146, "dur": 32.749, + "args": { + "External id": 2938062,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 1336755, "tid": 1336755, + "ts": 1555015793316.079, "dur": 75.639, + "args": { + "External id": 2938063,"Record function id": 0, "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015793464.824, "dur": 46.382, + "args": { + "External id": 2938064,"Record function id": 0, "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.26)", "pid": 1336755, "tid": 1336755, + "ts": 1555015793520.408, "dur": 18734.212, + "args": { + "External id": 2938065,"Record function id": 0, "Ev Idx": 8040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 1336755, "tid": 1336755, + "ts": 1555015793529.381, "dur": 900.803, + "args": { + "External id": 2938066,"Record function id": 0, "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015793610.785, "dur": 7.935, + "args": { + "External id": 2938067,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015793630.820, "dur": 31.963, + "args": { + "External id": 2938068,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793635.745, "dur": 2.140, + "args": { + "External id": 2938069,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793641.894, "dur": 0.524, + "args": { + "External id": 2938070,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793643.810, "dur": 0.736, + "args": { + "External id": 2938071,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793645.396, "dur": 0.764, + "args": { + "External id": 2938072,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793648.651, "dur": 0.583, + "args": { + "External id": 2938073,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793650.181, "dur": 0.530, + "args": { + "External id": 2938074,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793651.867, "dur": 1.692, + "args": { + "External id": 2938075,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793654.863, "dur": 0.440, + "args": { + "External id": 2938076,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793656.300, "dur": 0.432, + "args": { + "External id": 2938077,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015793672.717, "dur": 38.176, + "args": { + "External id": 2938078,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015793742.241, "dur": 110.727, + "args": { + "External id": 2938079,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015793752.741, "dur": 4.334, + "args": { + "External id": 2938080,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015793762.373, "dur": 9.726, + "args": { + "External id": 2938081,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015793766.466, "dur": 5.253, + "args": { + "External id": 2938082,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793769.648, "dur": 0.704, + "args": { + "External id": 2938083,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015793778.902, "dur": 30.522, + "args": { + "External id": 2938084,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793781.111, "dur": 2.152, + "args": { + "External id": 2938085,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793784.677, "dur": 0.602, + "args": { + "External id": 2938086,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793789.087, "dur": 0.771, + "args": { + "External id": 2938087,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793792.516, "dur": 0.626, + "args": { + "External id": 2938088,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793794.335, "dur": 0.560, + "args": { + "External id": 2938089,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793796.037, "dur": 1.775, + "args": { + "External id": 2938090,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793798.955, "dur": 0.687, + "args": { + "External id": 2938091,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793800.921, "dur": 0.611, + "args": { + "External id": 2938092,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015793804.470, "dur": 0.734, + "args": { + "External id": 2938093,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015793820.482, "dur": 24.715, + "args": { + "External id": 2938094,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015793902.470, "dur": 429.373, + "args": { + "External id": 2938095,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015793931.595, "dur": 394.840, + "args": { + "External id": 2938096,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8071, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015793941.132, "dur": 379.050, + "args": { + "External id": 2938097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015794356.131, "dur": 2.260, + "args": { + "External id": 2938098,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8073, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 1336755, "tid": 1336755, + "ts": 1555015794450.452, "dur": 17585.994, + "args": { + "External id": 2938099,"Record function id": 0, "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794550.948, "dur": 6.988, + "args": { + "External id": 2938100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794561.346, "dur": 1.087, + "args": { + "External id": 2938101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794570.231, "dur": 1.226, + "args": { + "External id": 2938102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794574.830, "dur": 0.904, + "args": { + "External id": 2938103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794577.098, "dur": 0.920, + "args": { + "External id": 2938104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794579.484, "dur": 0.983, + "args": { + "External id": 2938105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794581.966, "dur": 1.023, + "args": { + "External id": 2938106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794586.324, "dur": 1.992, + "args": { + "External id": 2938107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794589.630, "dur": 1.284, + "args": { + "External id": 2938108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015794592.160, "dur": 0.879, + "args": { + "External id": 2938109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015794611.960, "dur": 17360.183, + "args": { + "External id": 2938110,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015794626.946, "dur": 17338.656, + "args": { + "External id": 2938111,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015794644.144, "dur": 12.583, + "args": { + "External id": 2938112,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015794661.033, "dur": 17272.041, + "args": { + "External id": 2938113,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015794663.475, "dur": 17268.960, + "args": { + "External id": 2938114,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015794668.674, "dur": 4.930, + "args": { + "External id": 2938115,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015794675.430, "dur": 17254.114, + "args": { + "External id": 2938116,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015812192.922, "dur": 36.550, + "args": { + "External id": 2938117,"Sequence number": 29294613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 8092 + } + }, + { + "ph": "s", "id": 175, "pid": 1336755, "tid": 1336755, "ts": 1555015812192.922, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015812216.088, "dur": 8.207, + "args": { + "External id": 2938118,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015812219.401, "dur": 4.500, + "args": { + "External id": 2938119,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015812290.445, "dur": 73.889, + "args": { + "External id": 2938120,"Record function id": 0, "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015812365.712, "dur": 1054.963, + "args": { + "External id": 2938121,"Record function id": 0, "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015812402.627, "dur": 1005.358, + "args": { + "External id": 2938122,"Sequence number": 29294614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 8097 + } + }, + { + "ph": "s", "id": 174, "pid": 1336755, "tid": 1336755, "ts": 1555015812402.627, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015812467.288, "dur": 43.079, + "args": { + "External id": 2938123,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015812523.110, "dur": 101.276, + "args": { + "External id": 2938124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015812633.458, "dur": 39.030, + "args": { + "External id": 2938125,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015812681.740, "dur": 33.580, + "args": { + "External id": 2938126,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015812738.169, "dur": 24.775, + "args": { + "External id": 2938127,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015812781.662, "dur": 14.752, + "args": { + "External id": 2938128,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015812812.347, "dur": 123.299, + "args": { + "External id": 2938129,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015812860.868, "dur": 10.844, + "args": { + "External id": 2938130,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015812865.768, "dur": 5.148, + "args": { + "External id": 2938131,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015812874.707, "dur": 5.429, + "args": { + "External id": 2938132,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015812881.541, "dur": 1.684, + "args": { + "External id": 2938133,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015812885.703, "dur": 2.648, + "args": { + "External id": 2938134,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015812946.314, "dur": 81.960, + "args": { + "External id": 2938135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015813063.089, "dur": 28.529, + "args": { + "External id": 2938136,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015813099.622, "dur": 58.907, + "args": { + "External id": 2938137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015813170.634, "dur": 39.570, + "args": { + "External id": 2938138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015813232.584, "dur": 25.729, + "args": { + "External id": 2938139,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015813264.644, "dur": 36.436, + "args": { + "External id": 2938140,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015813322.635, "dur": 18.660, + "args": { + "External id": 2938141,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.26)", "pid": 1336755, "tid": 1336755, + "ts": 1555015813481.396, "dur": 72.587, + "args": { + "External id": 2938142,"Record function id": 0, "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336755, "tid": 1336755, + "ts": 1555015813624.797, "dur": 45.722, + "args": { + "External id": 2938143,"Record function id": 0, "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.27)", "pid": 1336755, "tid": 1336755, + "ts": 1555015813680.052, "dur": 18654.979, + "args": { + "External id": 2938144,"Record function id": 0, "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 1336755, "tid": 1336755, + "ts": 1555015813687.215, "dur": 920.350, + "args": { + "External id": 2938145,"Record function id": 0, "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015813764.640, "dur": 8.296, + "args": { + "External id": 2938146,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015813785.115, "dur": 33.647, + "args": { + "External id": 2938147,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813790.215, "dur": 2.403, + "args": { + "External id": 2938148,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813796.357, "dur": 0.731, + "args": { + "External id": 2938149,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813798.360, "dur": 0.444, + "args": { + "External id": 2938150,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813800.341, "dur": 0.910, + "args": { + "External id": 2938151,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813804.074, "dur": 0.623, + "args": { + "External id": 2938152,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813805.752, "dur": 0.442, + "args": { + "External id": 2938153,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813807.194, "dur": 2.176, + "args": { + "External id": 2938154,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813810.681, "dur": 0.690, + "args": { + "External id": 2938155,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813812.323, "dur": 0.383, + "args": { + "External id": 2938156,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015813830.443, "dur": 38.581, + "args": { + "External id": 2938157,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336755, "tid": 1336755, + "ts": 1555015813901.080, "dur": 147.252, + "args": { + "External id": 2938158,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015813911.582, "dur": 4.027, + "args": { + "External id": 2938159,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336755, "tid": 1336755, + "ts": 1555015813920.697, "dur": 9.638, + "args": { + "External id": 2938160,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015813924.794, "dur": 5.116, + "args": { + "External id": 2938161,"Record function id": 0, "Concrete Inputs": ["", "0", "12846080", "19269120", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813928.088, "dur": 0.713, + "args": { + "External id": 2938162,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336755, "tid": 1336755, + "ts": 1555015813936.300, "dur": 25.210, + "args": { + "External id": 2938163,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813937.707, "dur": 1.979, + "args": { + "External id": 2938164,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "12846080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813940.791, "dur": 0.472, + "args": { + "External id": 2938165,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "12846336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813942.219, "dur": 0.792, + "args": { + "External id": 2938166,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13370624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813946.754, "dur": 0.435, + "args": { + "External id": 2938167,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "13894912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813948.011, "dur": 0.521, + "args": { + "External id": 2938168,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14419200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813949.539, "dur": 0.521, + "args": { + "External id": 2938169,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14943488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813952.621, "dur": 0.592, + "args": { + "External id": 2938170,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "14943744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813954.242, "dur": 0.684, + "args": { + "External id": 2938171,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16385536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015813955.641, "dur": 2.095, + "args": { + "External id": 2938172,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "17827328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015813973.268, "dur": 65.148, + "args": { + "External id": 2938173,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336755, "tid": 1336755, + "ts": 1555015814102.898, "dur": 409.775, + "args": { + "External id": 2938174,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015814132.439, "dur": 375.116, + "args": { + "External id": 2938175,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8150, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336755, "tid": 1336755, + "ts": 1555015814157.457, "dur": 344.788, + "args": { + "External id": 2938176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555015814534.606, "dur": 2.515, + "args": { + "External id": 2938177,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8152, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 1336755, "tid": 1336755, + "ts": 1555015814628.881, "dur": 17499.356, + "args": { + "External id": 2938178,"Record function id": 0, "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814741.053, "dur": 6.926, + "args": { + "External id": 2938179,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814751.260, "dur": 1.198, + "args": { + "External id": 2938180,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814754.171, "dur": 1.224, + "args": { + "External id": 2938181,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814758.361, "dur": 0.957, + "args": { + "External id": 2938182,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814761.193, "dur": 0.893, + "args": { + "External id": 2938183,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814763.767, "dur": 1.057, + "args": { + "External id": 2938184,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814766.558, "dur": 1.106, + "args": { + "External id": 2938185,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814770.674, "dur": 1.967, + "args": { + "External id": 2938186,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814773.932, "dur": 1.092, + "args": { + "External id": 2938187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015814776.225, "dur": 1.059, + "args": { + "External id": 2938188,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015814795.631, "dur": 17294.498, + "args": { + "External id": 2938189,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015814810.563, "dur": 17273.052, + "args": { + "External id": 2938190,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015814827.597, "dur": 12.386, + "args": { + "External id": 2938191,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015814844.390, "dur": 17207.292, + "args": { + "External id": 2938192,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015814847.166, "dur": 17204.036, + "args": { + "External id": 2938193,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015814852.248, "dur": 5.482, + "args": { + "External id": 2938194,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015814859.386, "dur": 17189.197, + "args": { + "External id": 2938195,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015832273.640, "dur": 35.682, + "args": { + "External id": 2938196,"Sequence number": 29294615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 8171 + } + }, + { + "ph": "s", "id": 173, "pid": 1336755, "tid": 1336755, "ts": 1555015832273.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015832297.020, "dur": 7.500, + "args": { + "External id": 2938197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015832299.856, "dur": 4.309, + "args": { + "External id": 2938198,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015832371.235, "dur": 75.452, + "args": { + "External id": 2938199,"Record function id": 0, "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336755, "tid": 1336755, + "ts": 1555015832448.027, "dur": 1069.315, + "args": { + "External id": 2938200,"Record function id": 0, "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015832485.411, "dur": 1018.322, + "args": { + "External id": 2938201,"Sequence number": 29294616, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 8176 + } + }, + { + "ph": "s", "id": 172, "pid": 1336755, "tid": 1336755, "ts": 1555015832485.411, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015832548.788, "dur": 42.158, + "args": { + "External id": 2938202,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015832603.225, "dur": 106.983, + "args": { + "External id": 2938203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015832719.653, "dur": 39.333, + "args": { + "External id": 2938204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015832767.562, "dur": 32.366, + "args": { + "External id": 2938205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015832822.410, "dur": 25.202, + "args": { + "External id": 2938206,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015832865.654, "dur": 14.986, + "args": { + "External id": 2938207,"kernel_hash": "coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oo/coorxmmfbdwsavqf5vwvbytvmpiddkartg54ba2xepdvjwzstlkp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015832896.162, "dur": 165.181, + "args": { + "External id": 2938208,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015832943.468, "dur": 10.494, + "args": { + "External id": 2938209,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015832947.725, "dur": 5.494, + "args": { + "External id": 2938210,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015832956.403, "dur": 5.332, + "args": { + "External id": 2938211,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015832963.042, "dur": 1.868, + "args": { + "External id": 2938212,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015832967.222, "dur": 2.799, + "args": { + "External id": 2938213,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015833073.988, "dur": 52.484, + "args": { + "External id": 2938214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336755, "tid": 1336755, + "ts": 1555015833173.747, "dur": 36.829, + "args": { + "External id": 2938215,"kernel_hash": "cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/bx/cbxns3lorv66jtkf44dnvutifziopmxtoyfbdfnydomjtfcdvqcn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015833220.960, "dur": 45.007, + "args": { + "External id": 2938216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015833274.080, "dur": 35.587, + "args": { + "External id": 2938217,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015833330.354, "dur": 28.296, + "args": { + "External id": 2938218,"kernel_hash": "c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/6g/c6gyydlfzfqr6lka5ahac7dks6f6oy3qw4ddnq3ec6yt47qgi4sn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015833364.789, "dur": 34.522, + "args": { + "External id": 2938219,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336755, "tid": 1336755, + "ts": 1555015833419.098, "dur": 17.023, + "args": { + "External id": 2938220,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.27)", "pid": 1336755, "tid": 1336755, + "ts": 1555015833579.700, "dur": 34.026, + "args": { + "External id": 2938221,"Record function id": 0, "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015833744.091, "dur": 313.949, + "args": { + "External id": 2938222,"Sequence number": 29294617, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8197 + } + }, + { + "ph": "s", "id": 171, "pid": 1336755, "tid": 1336755, "ts": 1555015833744.091, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015833773.623, "dur": 7.593, + "args": { + "External id": 2938223,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015833775.482, "dur": 5.423, + "args": { + "External id": 2938224,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015833790.755, "dur": 10.734, + "args": { + "External id": 2938225,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015833793.644, "dur": 7.220, + "args": { + "External id": 2938226,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015833809.850, "dur": 4.861, + "args": { + "External id": 2938227,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834034.472, "dur": 7.224, + "args": { + "External id": 2938228,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834037.207, "dur": 4.065, + "args": { + "External id": 2938229,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015834086.324, "dur": 153.712, + "args": { + "External id": 2938230,"Sequence number": 29294618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015834089.444, "dur": 14.693, + "args": { + "External id": 2938231,"Sequence number": 29294618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8206 + } + }, + { + "ph": "s", "id": 170, "pid": 1336755, "tid": 1336755, "ts": 1555015834089.444, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015834095.226, "dur": 7.266, + "args": { + "External id": 2938232,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015834100.157, "dur": 1.995, + "args": { + "External id": 2938233,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015834106.549, "dur": 133.122, + "args": { + "External id": 2938234,"Sequence number": 29294619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834109.601, "dur": 4.916, + "args": { + "External id": 2938235,"Sequence number": 29294619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834110.293, "dur": 4.071, + "args": { + "External id": 2938236,"Sequence number": 29294619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8211 + } + }, + { + "ph": "s", "id": 169, "pid": 1336755, "tid": 1336755, "ts": 1555015834110.293, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015834118.316, "dur": 109.858, + "args": { + "External id": 2938237,"Sequence number": 29294620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8212 + } + }, + { + "ph": "s", "id": 168, "pid": 1336755, "tid": 1336755, "ts": 1555015834118.316, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834233.379, "dur": 5.201, + "args": { + "External id": 2938238,"Sequence number": 29294621, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8213 + } + }, + { + "ph": "s", "id": 167, "pid": 1336755, "tid": 1336755, "ts": 1555015834233.379, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015834252.619, "dur": 69.385, + "args": { + "External id": 2938239,"Sequence number": 29294622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015834253.430, "dur": 7.956, + "args": { + "External id": 2938240,"Sequence number": 29294622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8215 + } + }, + { + "ph": "s", "id": 166, "pid": 1336755, "tid": 1336755, "ts": 1555015834253.430, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015834255.517, "dur": 4.590, + "args": { + "External id": 2938241,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015834258.785, "dur": 1.078, + "args": { + "External id": 2938242,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015834262.248, "dur": 59.500, + "args": { + "External id": 2938243,"Sequence number": 29294623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834263.245, "dur": 5.875, + "args": { + "External id": 2938244,"Sequence number": 29294623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834264.320, "dur": 4.634, + "args": { + "External id": 2938245,"Sequence number": 29294623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8220 + } + }, + { + "ph": "s", "id": 165, "pid": 1336755, "tid": 1336755, "ts": 1555015834264.320, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015834269.856, "dur": 45.935, + "args": { + "External id": 2938246,"Sequence number": 29294624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8221 + } + }, + { + "ph": "s", "id": 164, "pid": 1336755, "tid": 1336755, "ts": 1555015834269.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834317.640, "dur": 3.680, + "args": { + "External id": 2938247,"Sequence number": 29294625, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8222 + } + }, + { + "ph": "s", "id": 163, "pid": 1336755, "tid": 1336755, "ts": 1555015834317.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015834330.342, "dur": 61.762, + "args": { + "External id": 2938248,"Sequence number": 29294626, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015834330.978, "dur": 5.187, + "args": { + "External id": 2938249,"Sequence number": 29294626, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8224 + } + }, + { + "ph": "s", "id": 162, "pid": 1336755, "tid": 1336755, "ts": 1555015834330.978, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015834332.624, "dur": 2.423, + "args": { + "External id": 2938250,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015834334.294, "dur": 0.612, + "args": { + "External id": 2938251,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015834338.889, "dur": 53.026, + "args": { + "External id": 2938252,"Sequence number": 29294627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834339.868, "dur": 3.992, + "args": { + "External id": 2938253,"Sequence number": 29294627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834340.556, "dur": 3.163, + "args": { + "External id": 2938254,"Sequence number": 29294627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8229 + } + }, + { + "ph": "s", "id": 161, "pid": 1336755, "tid": 1336755, "ts": 1555015834340.556, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015834344.332, "dur": 40.677, + "args": { + "External id": 2938255,"Sequence number": 29294628, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8230 + } + }, + { + "ph": "s", "id": 160, "pid": 1336755, "tid": 1336755, "ts": 1555015834344.332, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834386.625, "dur": 4.908, + "args": { + "External id": 2938256,"Sequence number": 29294629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8231 + } + }, + { + "ph": "s", "id": 159, "pid": 1336755, "tid": 1336755, "ts": 1555015834386.625, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834415.447, "dur": 3.816, + "args": { + "External id": 2938257,"Sequence number": 29294630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834416.470, "dur": 2.663, + "args": { + "External id": 2938258,"Sequence number": 29294630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8233 + } + }, + { + "ph": "s", "id": 158, "pid": 1336755, "tid": 1336755, "ts": 1555015834416.470, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834426.476, "dur": 3.114, + "args": { + "External id": 2938259,"Sequence number": 29294631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834427.235, "dur": 2.227, + "args": { + "External id": 2938260,"Sequence number": 29294631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8235 + } + }, + { + "ph": "s", "id": 157, "pid": 1336755, "tid": 1336755, "ts": 1555015834427.235, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834435.432, "dur": 4.076, + "args": { + "External id": 2938261,"Sequence number": 29294632, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015834436.437, "dur": 2.913, + "args": { + "External id": 2938262,"Sequence number": 29294632, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8237 + } + }, + { + "ph": "s", "id": 156, "pid": 1336755, "tid": 1336755, "ts": 1555015834436.437, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015834475.113, "dur": 173.507, + "args": { + "External id": 2938263,"Sequence number": 29294633, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8238 + } + }, + { + "ph": "s", "id": 155, "pid": 1336755, "tid": 1336755, "ts": 1555015834475.113, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015834495.697, "dur": 8.671, + "args": { + "External id": 2938264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015834498.578, "dur": 5.178, + "args": { + "External id": 2938265,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015834661.887, "dur": 106.585, + "args": { + "External id": 2938266,"Sequence number": 29294634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "s", "id": 154, "pid": 1336755, "tid": 1336755, "ts": 1555015834661.887, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015834676.233, "dur": 6.430, + "args": { + "External id": 2938267,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015834678.262, "dur": 3.966, + "args": { + "External id": 2938268,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336755, "tid": 1336755, + "ts": 1555015834797.808, "dur": 178.348, + "args": { + "External id": 2938269,"Sequence number": 29294635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8244 + } + }, + { + "ph": "s", "id": 153, "pid": 1336755, "tid": 1336755, "ts": 1555015834797.808, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015834825.385, "dur": 122.664, + "args": { + "External id": 2938270,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015834874.058, "dur": 6.431, + "args": { + "External id": 2938271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015834876.111, "dur": 3.851, + "args": { + "External id": 2938272,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015834882.862, "dur": 4.069, + "args": { + "External id": 2938273,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015834888.641, "dur": 1.344, + "args": { + "External id": 2938274,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015834894.071, "dur": 2.850, + "args": { + "External id": 2938275,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1336755, + "ts": 1555015834962.182, "dur": 4.595, + "args": { + "External id": 2938276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015834981.119, "dur": 46.378, + "args": { + "External id": 2938277,"Sequence number": 29294636, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835021.912, "dur": 5.295, + "args": { + "External id": 2938278,"Sequence number": 29294636, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8253 + } + }, + { + "ph": "s", "id": 152, "pid": 1336755, "tid": 1336755, "ts": 1555015835021.912, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015835045.892, "dur": 129.728, + "args": { + "External id": 2938279,"Sequence number": 29294637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015835048.872, "dur": 8.278, + "args": { + "External id": 2938280,"Sequence number": 29294637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8255 + } + }, + { + "ph": "s", "id": 151, "pid": 1336755, "tid": 1336755, "ts": 1555015835048.872, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015835051.770, "dur": 4.267, + "args": { + "External id": 2938281,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015835054.098, "dur": 1.661, + "args": { + "External id": 2938282,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015835058.494, "dur": 116.797, + "args": { + "External id": 2938283,"Sequence number": 29294638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835060.658, "dur": 7.763, + "args": { + "External id": 2938284,"Sequence number": 29294638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835063.098, "dur": 5.167, + "args": { + "External id": 2938285,"Sequence number": 29294638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8260 + } + }, + { + "ph": "s", "id": 150, "pid": 1336755, "tid": 1336755, "ts": 1555015835063.098, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015835069.612, "dur": 95.989, + "args": { + "External id": 2938286,"Sequence number": 29294639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8261 + } + }, + { + "ph": "s", "id": 149, "pid": 1336755, "tid": 1336755, "ts": 1555015835069.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835168.627, "dur": 5.863, + "args": { + "External id": 2938287,"Sequence number": 29294640, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8262 + } + }, + { + "ph": "s", "id": 148, "pid": 1336755, "tid": 1336755, "ts": 1555015835168.627, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015835213.308, "dur": 223.564, + "args": { + "External id": 2938288,"Sequence number": 29294641, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "s", "id": 147, "pid": 1336755, "tid": 1336755, "ts": 1555015835213.308, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835235.388, "dur": 2.712, + "args": { + "External id": 2938289,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835236.190, "dur": 1.554, + "args": { + "External id": 2938290,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015835243.440, "dur": 3.768, + "args": { + "External id": 2938291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835244.948, "dur": 2.152, + "args": { + "External id": 2938292,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835245.866, "dur": 1.125, + "args": { + "External id": 2938293,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015835254.593, "dur": 8.506, + "args": { + "External id": 2938294,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015835258.106, "dur": 4.609, + "args": { + "External id": 2938295,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015835269.138, "dur": 3.311, + "args": { + "External id": 2938296,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015835276.382, "dur": 2.731, + "args": { + "External id": 2938297,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835413.710, "dur": 3.494, + "args": { + "External id": 2938298,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835414.751, "dur": 2.269, + "args": { + "External id": 2938299,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835419.501, "dur": 2.449, + "args": { + "External id": 2938300,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835420.757, "dur": 1.060, + "args": { + "External id": 2938301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015835455.360, "dur": 100.922, + "args": { + "External id": 2938302,"Sequence number": 29294642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015835456.634, "dur": 8.005, + "args": { + "External id": 2938303,"Sequence number": 29294642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8278 + } + }, + { + "ph": "s", "id": 146, "pid": 1336755, "tid": 1336755, "ts": 1555015835456.634, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015835459.594, "dur": 3.815, + "args": { + "External id": 2938304,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015835461.488, "dur": 1.495, + "args": { + "External id": 2938305,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015835465.646, "dur": 90.389, + "args": { + "External id": 2938306,"Sequence number": 29294643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835469.148, "dur": 3.402, + "args": { + "External id": 2938307,"Sequence number": 29294643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835470.047, "dur": 2.373, + "args": { + "External id": 2938308,"Sequence number": 29294643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8283 + } + }, + { + "ph": "s", "id": 145, "pid": 1336755, "tid": 1336755, "ts": 1555015835470.047, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015835473.586, "dur": 77.136, + "args": { + "External id": 2938309,"Sequence number": 29294644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8284 + } + }, + { + "ph": "s", "id": 144, "pid": 1336755, "tid": 1336755, "ts": 1555015835473.586, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835552.621, "dur": 2.834, + "args": { + "External id": 2938310,"Sequence number": 29294645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8285 + } + }, + { + "ph": "s", "id": 143, "pid": 1336755, "tid": 1336755, "ts": 1555015835552.621, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015835563.986, "dur": 68.458, + "args": { + "External id": 2938311,"Sequence number": 29294646, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015835566.407, "dur": 5.943, + "args": { + "External id": 2938312,"Sequence number": 29294646, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8287 + } + }, + { + "ph": "s", "id": 142, "pid": 1336755, "tid": 1336755, "ts": 1555015835566.407, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015835568.555, "dur": 2.440, + "args": { + "External id": 2938313,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015835569.936, "dur": 0.920, + "args": { + "External id": 2938314,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015835573.050, "dur": 59.174, + "args": { + "External id": 2938315,"Sequence number": 29294647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835574.130, "dur": 6.924, + "args": { + "External id": 2938316,"Sequence number": 29294647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835576.405, "dur": 4.475, + "args": { + "External id": 2938317,"Sequence number": 29294647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8292 + } + }, + { + "ph": "s", "id": 141, "pid": 1336755, "tid": 1336755, "ts": 1555015835576.405, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015835581.532, "dur": 44.559, + "args": { + "External id": 2938318,"Sequence number": 29294648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8293 + } + }, + { + "ph": "s", "id": 140, "pid": 1336755, "tid": 1336755, "ts": 1555015835581.532, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835627.892, "dur": 3.963, + "args": { + "External id": 2938319,"Sequence number": 29294649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8294 + } + }, + { + "ph": "s", "id": 139, "pid": 1336755, "tid": 1336755, "ts": 1555015835627.892, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015835653.581, "dur": 150.679, + "args": { + "External id": 2938320,"Sequence number": 29294650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8295 + } + }, + { + "ph": "s", "id": 138, "pid": 1336755, "tid": 1336755, "ts": 1555015835653.581, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015835696.430, "dur": 3.646, + "args": { + "External id": 2938321,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015835734.019, "dur": 58.382, + "args": { + "External id": 2938322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015835734.800, "dur": 5.446, + "args": { + "External id": 2938323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015835735.888, "dur": 3.435, + "args": { + "External id": 2938324,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015835738.045, "dur": 1.114, + "args": { + "External id": 2938325,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015835740.836, "dur": 51.161, + "args": { + "External id": 2938326,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835743.581, "dur": 2.113, + "args": { + "External id": 2938327,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835744.416, "dur": 1.167, + "args": { + "External id": 2938328,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015835746.229, "dur": 41.904, + "args": { + "External id": 2938329,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835790.275, "dur": 1.143, + "args": { + "External id": 2938330,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555015835813.894, "dur": 26.962, + "args": { + "External id": 2938331,"Sequence number": 29294651, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8306 + } + }, + { + "ph": "s", "id": 137, "pid": 1336755, "tid": 1336755, "ts": 1555015835813.894, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015835871.068, "dur": 224.824, + "args": { + "External id": 2938332,"Sequence number": 29294652, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8307 + } + }, + { + "ph": "s", "id": 136, "pid": 1336755, "tid": 1336755, "ts": 1555015835871.068, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015835889.739, "dur": 4.615, + "args": { + "External id": 2938333,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015835892.257, "dur": 1.934, + "args": { + "External id": 2938334,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015835901.795, "dur": 6.133, + "args": { + "External id": 2938335,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015835903.601, "dur": 3.854, + "args": { + "External id": 2938336,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015835913.194, "dur": 3.693, + "args": { + "External id": 2938337,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836078.881, "dur": 4.163, + "args": { + "External id": 2938338,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836080.029, "dur": 2.719, + "args": { + "External id": 2938339,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015836113.958, "dur": 108.345, + "args": { + "External id": 2938340,"Sequence number": 29294653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015836114.893, "dur": 8.417, + "args": { + "External id": 2938341,"Sequence number": 29294653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8316 + } + }, + { + "ph": "s", "id": 135, "pid": 1336755, "tid": 1336755, "ts": 1555015836114.893, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015836118.416, "dur": 3.639, + "args": { + "External id": 2938342,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015836120.462, "dur": 1.332, + "args": { + "External id": 2938343,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015836124.412, "dur": 97.523, + "args": { + "External id": 2938344,"Sequence number": 29294654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836126.139, "dur": 5.171, + "args": { + "External id": 2938345,"Sequence number": 29294654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836128.426, "dur": 2.754, + "args": { + "External id": 2938346,"Sequence number": 29294654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8321 + } + }, + { + "ph": "s", "id": 134, "pid": 1336755, "tid": 1336755, "ts": 1555015836128.426, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015836132.235, "dur": 83.000, + "args": { + "External id": 2938347,"Sequence number": 29294655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8322 + } + }, + { + "ph": "s", "id": 133, "pid": 1336755, "tid": 1336755, "ts": 1555015836132.235, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836218.476, "dur": 2.809, + "args": { + "External id": 2938348,"Sequence number": 29294656, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8323 + } + }, + { + "ph": "s", "id": 132, "pid": 1336755, "tid": 1336755, "ts": 1555015836218.476, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015836231.685, "dur": 67.986, + "args": { + "External id": 2938349,"Sequence number": 29294657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015836232.346, "dur": 9.628, + "args": { + "External id": 2938350,"Sequence number": 29294657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8325 + } + }, + { + "ph": "s", "id": 131, "pid": 1336755, "tid": 1336755, "ts": 1555015836232.346, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015836237.955, "dur": 2.902, + "args": { + "External id": 2938351,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015836239.729, "dur": 0.937, + "args": { + "External id": 2938352,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015836242.908, "dur": 56.555, + "args": { + "External id": 2938353,"Sequence number": 29294658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836244.000, "dur": 4.719, + "args": { + "External id": 2938354,"Sequence number": 29294658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836244.883, "dur": 3.711, + "args": { + "External id": 2938355,"Sequence number": 29294658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8330 + } + }, + { + "ph": "s", "id": 130, "pid": 1336755, "tid": 1336755, "ts": 1555015836244.883, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015836249.530, "dur": 45.231, + "args": { + "External id": 2938356,"Sequence number": 29294659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8331 + } + }, + { + "ph": "s", "id": 129, "pid": 1336755, "tid": 1336755, "ts": 1555015836249.530, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836296.616, "dur": 2.532, + "args": { + "External id": 2938357,"Sequence number": 29294660, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8332 + } + }, + { + "ph": "s", "id": 128, "pid": 1336755, "tid": 1336755, "ts": 1555015836296.616, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015836306.414, "dur": 62.032, + "args": { + "External id": 2938358,"Sequence number": 29294661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015836307.198, "dur": 8.282, + "args": { + "External id": 2938359,"Sequence number": 29294661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8334 + } + }, + { + "ph": "s", "id": 127, "pid": 1336755, "tid": 1336755, "ts": 1555015836307.198, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015836308.720, "dur": 5.781, + "args": { + "External id": 2938360,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015836311.769, "dur": 2.360, + "args": { + "External id": 2938361,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015836316.341, "dur": 51.917, + "args": { + "External id": 2938362,"Sequence number": 29294662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836317.523, "dur": 3.867, + "args": { + "External id": 2938363,"Sequence number": 29294662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836318.298, "dur": 2.972, + "args": { + "External id": 2938364,"Sequence number": 29294662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8339 + } + }, + { + "ph": "s", "id": 126, "pid": 1336755, "tid": 1336755, "ts": 1555015836318.298, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015836323.597, "dur": 39.074, + "args": { + "External id": 2938365,"Sequence number": 29294663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8340 + } + }, + { + "ph": "s", "id": 125, "pid": 1336755, "tid": 1336755, "ts": 1555015836323.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836364.271, "dur": 3.680, + "args": { + "External id": 2938366,"Sequence number": 29294664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8341 + } + }, + { + "ph": "s", "id": 124, "pid": 1336755, "tid": 1336755, "ts": 1555015836364.271, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836384.295, "dur": 3.953, + "args": { + "External id": 2938367,"Sequence number": 29294665, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836384.931, "dur": 3.195, + "args": { + "External id": 2938368,"Sequence number": 29294665, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8343 + } + }, + { + "ph": "s", "id": 123, "pid": 1336755, "tid": 1336755, "ts": 1555015836384.931, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836420.899, "dur": 6.947, + "args": { + "External id": 2938369,"Sequence number": 29294666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836424.224, "dur": 3.499, + "args": { + "External id": 2938370,"Sequence number": 29294666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8345 + } + }, + { + "ph": "s", "id": 122, "pid": 1336755, "tid": 1336755, "ts": 1555015836424.224, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836432.743, "dur": 4.379, + "args": { + "External id": 2938371,"Sequence number": 29294667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836433.466, "dur": 3.433, + "args": { + "External id": 2938372,"Sequence number": 29294667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8347 + } + }, + { + "ph": "s", "id": 121, "pid": 1336755, "tid": 1336755, "ts": 1555015836433.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015836466.262, "dur": 155.420, + "args": { + "External id": 2938373,"Sequence number": 29294668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8348 + } + }, + { + "ph": "s", "id": 120, "pid": 1336755, "tid": 1336755, "ts": 1555015836466.262, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015836485.200, "dur": 7.739, + "args": { + "External id": 2938374,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015836487.600, "dur": 4.731, + "args": { + "External id": 2938375,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015836635.699, "dur": 98.977, + "args": { + "External id": 2938376,"Sequence number": 29294669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8351 + } + }, + { + "ph": "s", "id": 119, "pid": 1336755, "tid": 1336755, "ts": 1555015836635.699, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015836648.868, "dur": 5.871, + "args": { + "External id": 2938377,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015836650.507, "dur": 3.750, + "args": { + "External id": 2938378,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336755, "tid": 1336755, + "ts": 1555015836760.333, "dur": 169.022, + "args": { + "External id": 2938379,"Sequence number": 29294670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8354 + } + }, + { + "ph": "s", "id": 118, "pid": 1336755, "tid": 1336755, "ts": 1555015836760.333, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015836787.968, "dur": 116.792, + "args": { + "External id": 2938380,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015836833.804, "dur": 6.388, + "args": { + "External id": 2938381,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015836835.742, "dur": 4.071, + "args": { + "External id": 2938382,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015836842.707, "dur": 3.584, + "args": { + "External id": 2938383,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015836847.605, "dur": 3.202, + "args": { + "External id": 2938384,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015836853.101, "dur": 3.090, + "args": { + "External id": 2938385,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1336755, + "ts": 1555015836916.133, "dur": 4.402, + "args": { + "External id": 2938386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836933.807, "dur": 5.898, + "args": { + "External id": 2938387,"Sequence number": 29294671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836934.778, "dur": 4.783, + "args": { + "External id": 2938388,"Sequence number": 29294671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8363 + } + }, + { + "ph": "s", "id": 117, "pid": 1336755, "tid": 1336755, "ts": 1555015836934.778, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015836952.653, "dur": 138.437, + "args": { + "External id": 2938389,"Sequence number": 29294672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015836953.570, "dur": 7.369, + "args": { + "External id": 2938390,"Sequence number": 29294672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8365 + } + }, + { + "ph": "s", "id": 116, "pid": 1336755, "tid": 1336755, "ts": 1555015836953.570, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015836955.726, "dur": 4.189, + "args": { + "External id": 2938391,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015836958.093, "dur": 1.574, + "args": { + "External id": 2938392,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015836961.681, "dur": 129.004, + "args": { + "External id": 2938393,"Sequence number": 29294673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015836964.667, "dur": 4.684, + "args": { + "External id": 2938394,"Sequence number": 29294673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015836965.338, "dur": 3.844, + "args": { + "External id": 2938395,"Sequence number": 29294673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8370 + } + }, + { + "ph": "s", "id": 115, "pid": 1336755, "tid": 1336755, "ts": 1555015836965.338, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015836970.114, "dur": 110.690, + "args": { + "External id": 2938396,"Sequence number": 29294674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8371 + } + }, + { + "ph": "s", "id": 114, "pid": 1336755, "tid": 1336755, "ts": 1555015836970.114, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837084.438, "dur": 5.405, + "args": { + "External id": 2938397,"Sequence number": 29294675, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8372 + } + }, + { + "ph": "s", "id": 113, "pid": 1336755, "tid": 1336755, "ts": 1555015837084.438, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015837125.819, "dur": 228.700, + "args": { + "External id": 2938398,"Sequence number": 29294676, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8373 + } + }, + { + "ph": "s", "id": 112, "pid": 1336755, "tid": 1336755, "ts": 1555015837125.819, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837163.737, "dur": 3.427, + "args": { + "External id": 2938399,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837164.485, "dur": 2.250, + "args": { + "External id": 2938400,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015837171.148, "dur": 4.687, + "args": { + "External id": 2938401,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837172.113, "dur": 3.616, + "args": { + "External id": 2938402,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837174.448, "dur": 1.159, + "args": { + "External id": 2938403,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015837182.976, "dur": 7.976, + "args": { + "External id": 2938404,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015837185.080, "dur": 5.472, + "args": { + "External id": 2938405,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015837196.935, "dur": 3.678, + "args": { + "External id": 2938406,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015837204.062, "dur": 3.538, + "args": { + "External id": 2938407,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837330.841, "dur": 2.894, + "args": { + "External id": 2938408,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837331.624, "dur": 1.880, + "args": { + "External id": 2938409,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837338.594, "dur": 2.198, + "args": { + "External id": 2938410,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837339.451, "dur": 1.230, + "args": { + "External id": 2938411,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015837372.508, "dur": 89.479, + "args": { + "External id": 2938412,"Sequence number": 29294677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015837373.757, "dur": 6.953, + "args": { + "External id": 2938413,"Sequence number": 29294677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8388 + } + }, + { + "ph": "s", "id": 111, "pid": 1336755, "tid": 1336755, "ts": 1555015837373.757, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015837375.788, "dur": 3.799, + "args": { + "External id": 2938414,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015837377.606, "dur": 1.707, + "args": { + "External id": 2938415,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015837383.174, "dur": 78.499, + "args": { + "External id": 2938416,"Sequence number": 29294678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837384.946, "dur": 2.821, + "args": { + "External id": 2938417,"Sequence number": 29294678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837385.367, "dur": 2.263, + "args": { + "External id": 2938418,"Sequence number": 29294678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8393 + } + }, + { + "ph": "s", "id": 110, "pid": 1336755, "tid": 1336755, "ts": 1555015837385.367, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015837388.474, "dur": 66.866, + "args": { + "External id": 2938419,"Sequence number": 29294679, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8394 + } + }, + { + "ph": "s", "id": 109, "pid": 1336755, "tid": 1336755, "ts": 1555015837388.474, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837457.103, "dur": 3.946, + "args": { + "External id": 2938420,"Sequence number": 29294680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8395 + } + }, + { + "ph": "s", "id": 108, "pid": 1336755, "tid": 1336755, "ts": 1555015837457.103, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015837469.196, "dur": 65.389, + "args": { + "External id": 2938421,"Sequence number": 29294681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015837470.175, "dur": 5.306, + "args": { + "External id": 2938422,"Sequence number": 29294681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8397 + } + }, + { + "ph": "s", "id": 107, "pid": 1336755, "tid": 1336755, "ts": 1555015837470.175, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015837471.659, "dur": 2.812, + "args": { + "External id": 2938423,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015837473.379, "dur": 0.931, + "args": { + "External id": 2938424,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015837476.105, "dur": 58.226, + "args": { + "External id": 2938425,"Sequence number": 29294682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837478.302, "dur": 5.859, + "args": { + "External id": 2938426,"Sequence number": 29294682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837479.202, "dur": 4.817, + "args": { + "External id": 2938427,"Sequence number": 29294682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8402 + } + }, + { + "ph": "s", "id": 106, "pid": 1336755, "tid": 1336755, "ts": 1555015837479.202, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015837484.659, "dur": 44.371, + "args": { + "External id": 2938428,"Sequence number": 29294683, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8403 + } + }, + { + "ph": "s", "id": 105, "pid": 1336755, "tid": 1336755, "ts": 1555015837484.659, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837530.680, "dur": 3.299, + "args": { + "External id": 2938429,"Sequence number": 29294684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8404 + } + }, + { + "ph": "s", "id": 104, "pid": 1336755, "tid": 1336755, "ts": 1555015837530.680, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015837556.075, "dur": 133.974, + "args": { + "External id": 2938430,"Sequence number": 29294685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8405 + } + }, + { + "ph": "s", "id": 103, "pid": 1336755, "tid": 1336755, "ts": 1555015837556.075, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015837590.143, "dur": 3.690, + "args": { + "External id": 2938431,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015837622.713, "dur": 55.901, + "args": { + "External id": 2938432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015837623.238, "dur": 5.064, + "args": { + "External id": 2938433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015837624.293, "dur": 3.219, + "args": { + "External id": 2938434,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015837625.796, "dur": 1.382, + "args": { + "External id": 2938435,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015837630.294, "dur": 48.061, + "args": { + "External id": 2938436,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837631.639, "dur": 1.942, + "args": { + "External id": 2938437,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837632.425, "dur": 1.042, + "args": { + "External id": 2938438,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015837634.097, "dur": 40.718, + "args": { + "External id": 2938439,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837676.728, "dur": 1.061, + "args": { + "External id": 2938440,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555015837697.616, "dur": 22.814, + "args": { + "External id": 2938441,"Sequence number": 29294686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8416 + } + }, + { + "ph": "s", "id": 102, "pid": 1336755, "tid": 1336755, "ts": 1555015837697.616, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015837749.387, "dur": 163.450, + "args": { + "External id": 2938442,"Sequence number": 29294687, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8417 + } + }, + { + "ph": "s", "id": 101, "pid": 1336755, "tid": 1336755, "ts": 1555015837749.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837766.744, "dur": 3.405, + "args": { + "External id": 2938443,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837767.746, "dur": 2.086, + "args": { + "External id": 2938444,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015837776.384, "dur": 6.110, + "args": { + "External id": 2938445,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015837778.611, "dur": 3.491, + "args": { + "External id": 2938446,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015837787.141, "dur": 2.963, + "args": { + "External id": 2938447,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837897.946, "dur": 2.865, + "args": { + "External id": 2938448,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837899.077, "dur": 1.461, + "args": { + "External id": 2938449,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015837930.715, "dur": 131.139, + "args": { + "External id": 2938450,"Sequence number": 29294688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015837931.643, "dur": 6.114, + "args": { + "External id": 2938451,"Sequence number": 29294688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8426 + } + }, + { + "ph": "s", "id": 100, "pid": 1336755, "tid": 1336755, "ts": 1555015837931.643, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015837933.512, "dur": 3.083, + "args": { + "External id": 2938452,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015837935.077, "dur": 1.197, + "args": { + "External id": 2938453,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015837938.418, "dur": 123.156, + "args": { + "External id": 2938454,"Sequence number": 29294689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015837941.301, "dur": 3.960, + "args": { + "External id": 2938455,"Sequence number": 29294689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015837942.661, "dur": 2.409, + "args": { + "External id": 2938456,"Sequence number": 29294689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8431 + } + }, + { + "ph": "s", "id": 99, "pid": 1336755, "tid": 1336755, "ts": 1555015837942.661, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015837945.758, "dur": 106.656, + "args": { + "External id": 2938457,"Sequence number": 29294690, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8432 + } + }, + { + "ph": "s", "id": 98, "pid": 1336755, "tid": 1336755, "ts": 1555015837945.758, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838056.141, "dur": 4.632, + "args": { + "External id": 2938458,"Sequence number": 29294691, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8433 + } + }, + { + "ph": "s", "id": 97, "pid": 1336755, "tid": 1336755, "ts": 1555015838056.141, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015838072.453, "dur": 66.865, + "args": { + "External id": 2938459,"Sequence number": 29294692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015838075.233, "dur": 5.507, + "args": { + "External id": 2938460,"Sequence number": 29294692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8435 + } + }, + { + "ph": "s", "id": 96, "pid": 1336755, "tid": 1336755, "ts": 1555015838075.233, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015838076.997, "dur": 2.614, + "args": { + "External id": 2938461,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015838078.356, "dur": 1.102, + "args": { + "External id": 2938462,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015838081.523, "dur": 57.554, + "args": { + "External id": 2938463,"Sequence number": 29294693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838082.793, "dur": 4.913, + "args": { + "External id": 2938464,"Sequence number": 29294693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838085.374, "dur": 2.228, + "args": { + "External id": 2938465,"Sequence number": 29294693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8440 + } + }, + { + "ph": "s", "id": 95, "pid": 1336755, "tid": 1336755, "ts": 1555015838085.374, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015838088.379, "dur": 47.102, + "args": { + "External id": 2938466,"Sequence number": 29294694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8441 + } + }, + { + "ph": "s", "id": 94, "pid": 1336755, "tid": 1336755, "ts": 1555015838088.379, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838137.197, "dur": 1.537, + "args": { + "External id": 2938467,"Sequence number": 29294695, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8442 + } + }, + { + "ph": "s", "id": 93, "pid": 1336755, "tid": 1336755, "ts": 1555015838137.197, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015838163.601, "dur": 68.249, + "args": { + "External id": 2938468,"Sequence number": 29294696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015838164.061, "dur": 9.903, + "args": { + "External id": 2938469,"Sequence number": 29294696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8444 + } + }, + { + "ph": "s", "id": 92, "pid": 1336755, "tid": 1336755, "ts": 1555015838164.061, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015838167.268, "dur": 5.175, + "args": { + "External id": 2938470,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015838169.310, "dur": 2.764, + "args": { + "External id": 2938471,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015838174.600, "dur": 57.037, + "args": { + "External id": 2938472,"Sequence number": 29294697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838175.527, "dur": 6.100, + "args": { + "External id": 2938473,"Sequence number": 29294697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838177.680, "dur": 3.500, + "args": { + "External id": 2938474,"Sequence number": 29294697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8449 + } + }, + { + "ph": "s", "id": 91, "pid": 1336755, "tid": 1336755, "ts": 1555015838177.680, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015838182.342, "dur": 43.622, + "args": { + "External id": 2938475,"Sequence number": 29294698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8450 + } + }, + { + "ph": "s", "id": 90, "pid": 1336755, "tid": 1336755, "ts": 1555015838182.342, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838227.741, "dur": 3.452, + "args": { + "External id": 2938476,"Sequence number": 29294699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8451 + } + }, + { + "ph": "s", "id": 89, "pid": 1336755, "tid": 1336755, "ts": 1555015838227.741, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838247.218, "dur": 4.723, + "args": { + "External id": 2938477,"Sequence number": 29294700, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838247.959, "dur": 3.861, + "args": { + "External id": 2938478,"Sequence number": 29294700, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8453 + } + }, + { + "ph": "s", "id": 88, "pid": 1336755, "tid": 1336755, "ts": 1555015838247.959, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838258.222, "dur": 2.864, + "args": { + "External id": 2938479,"Sequence number": 29294701, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838259.179, "dur": 1.789, + "args": { + "External id": 2938480,"Sequence number": 29294701, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8455 + } + }, + { + "ph": "s", "id": 87, "pid": 1336755, "tid": 1336755, "ts": 1555015838259.179, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838265.214, "dur": 2.912, + "args": { + "External id": 2938481,"Sequence number": 29294702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838266.148, "dur": 1.866, + "args": { + "External id": 2938482,"Sequence number": 29294702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8457 + } + }, + { + "ph": "s", "id": 86, "pid": 1336755, "tid": 1336755, "ts": 1555015838266.148, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015838296.731, "dur": 149.901, + "args": { + "External id": 2938483,"Sequence number": 29294703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8458 + } + }, + { + "ph": "s", "id": 85, "pid": 1336755, "tid": 1336755, "ts": 1555015838296.731, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015838316.529, "dur": 9.624, + "args": { + "External id": 2938484,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015838319.178, "dur": 6.374, + "args": { + "External id": 2938485,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015838458.941, "dur": 103.135, + "args": { + "External id": 2938486,"Sequence number": 29294704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "s", "id": 84, "pid": 1336755, "tid": 1336755, "ts": 1555015838458.941, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015838472.032, "dur": 6.423, + "args": { + "External id": 2938487,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015838474.133, "dur": 3.950, + "args": { + "External id": 2938488,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336755, "tid": 1336755, + "ts": 1555015838589.426, "dur": 166.037, + "args": { + "External id": 2938489,"Sequence number": 29294705, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8464 + } + }, + { + "ph": "s", "id": 83, "pid": 1336755, "tid": 1336755, "ts": 1555015838589.426, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015838613.944, "dur": 116.896, + "args": { + "External id": 2938490,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015838660.972, "dur": 6.294, + "args": { + "External id": 2938491,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015838662.989, "dur": 3.948, + "args": { + "External id": 2938492,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015838669.753, "dur": 3.748, + "args": { + "External id": 2938493,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015838674.583, "dur": 1.248, + "args": { + "External id": 2938494,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015838678.898, "dur": 2.862, + "args": { + "External id": 2938495,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1336755, + "ts": 1555015838742.509, "dur": 4.160, + "args": { + "External id": 2938496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838760.304, "dur": 5.062, + "args": { + "External id": 2938497,"Sequence number": 29294706, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838761.699, "dur": 3.531, + "args": { + "External id": 2938498,"Sequence number": 29294706, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8473 + } + }, + { + "ph": "s", "id": 82, "pid": 1336755, "tid": 1336755, "ts": 1555015838761.699, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015838776.973, "dur": 105.446, + "args": { + "External id": 2938499,"Sequence number": 29294707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015838778.368, "dur": 10.033, + "args": { + "External id": 2938500,"Sequence number": 29294707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8475 + } + }, + { + "ph": "s", "id": 81, "pid": 1336755, "tid": 1336755, "ts": 1555015838778.368, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015838782.805, "dur": 4.535, + "args": { + "External id": 2938501,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015838786.111, "dur": 0.974, + "args": { + "External id": 2938502,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015838789.555, "dur": 92.541, + "args": { + "External id": 2938503,"Sequence number": 29294708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838791.428, "dur": 2.460, + "args": { + "External id": 2938504,"Sequence number": 29294708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838791.947, "dur": 1.781, + "args": { + "External id": 2938505,"Sequence number": 29294708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8480 + } + }, + { + "ph": "s", "id": 80, "pid": 1336755, "tid": 1336755, "ts": 1555015838791.947, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015838794.585, "dur": 80.163, + "args": { + "External id": 2938506,"Sequence number": 29294709, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8481 + } + }, + { + "ph": "s", "id": 79, "pid": 1336755, "tid": 1336755, "ts": 1555015838794.585, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838876.900, "dur": 4.456, + "args": { + "External id": 2938507,"Sequence number": 29294710, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8482 + } + }, + { + "ph": "s", "id": 78, "pid": 1336755, "tid": 1336755, "ts": 1555015838876.900, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015838912.564, "dur": 262.517, + "args": { + "External id": 2938508,"Sequence number": 29294711, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "s", "id": 77, "pid": 1336755, "tid": 1336755, "ts": 1555015838912.564, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838928.561, "dur": 2.307, + "args": { + "External id": 2938509,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838929.179, "dur": 1.552, + "args": { + "External id": 2938510,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015838934.715, "dur": 5.099, + "args": { + "External id": 2938511,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015838937.525, "dur": 2.184, + "args": { + "External id": 2938512,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015838938.615, "dur": 1.005, + "args": { + "External id": 2938513,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015838946.758, "dur": 6.390, + "args": { + "External id": 2938514,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015838948.814, "dur": 4.021, + "args": { + "External id": 2938515,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015838958.794, "dur": 3.886, + "args": { + "External id": 2938516,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015838966.066, "dur": 3.159, + "args": { + "External id": 2938517,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839134.506, "dur": 4.406, + "args": { + "External id": 2938518,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839135.578, "dur": 2.996, + "args": { + "External id": 2938519,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839156.767, "dur": 3.359, + "args": { + "External id": 2938520,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839158.400, "dur": 1.472, + "args": { + "External id": 2938521,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015839194.460, "dur": 114.571, + "args": { + "External id": 2938522,"Sequence number": 29294712, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015839204.448, "dur": 8.713, + "args": { + "External id": 2938523,"Sequence number": 29294712, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8498 + } + }, + { + "ph": "s", "id": 76, "pid": 1336755, "tid": 1336755, "ts": 1555015839204.448, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015839208.246, "dur": 3.874, + "args": { + "External id": 2938524,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015839210.352, "dur": 1.477, + "args": { + "External id": 2938525,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015839213.998, "dur": 94.595, + "args": { + "External id": 2938526,"Sequence number": 29294713, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839215.402, "dur": 6.925, + "args": { + "External id": 2938527,"Sequence number": 29294713, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839216.518, "dur": 5.576, + "args": { + "External id": 2938528,"Sequence number": 29294713, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8503 + } + }, + { + "ph": "s", "id": 75, "pid": 1336755, "tid": 1336755, "ts": 1555015839216.518, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015839223.110, "dur": 75.876, + "args": { + "External id": 2938529,"Sequence number": 29294714, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8504 + } + }, + { + "ph": "s", "id": 74, "pid": 1336755, "tid": 1336755, "ts": 1555015839223.110, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839301.020, "dur": 6.962, + "args": { + "External id": 2938530,"Sequence number": 29294715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8505 + } + }, + { + "ph": "s", "id": 73, "pid": 1336755, "tid": 1336755, "ts": 1555015839301.020, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015839317.427, "dur": 68.198, + "args": { + "External id": 2938531,"Sequence number": 29294716, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015839318.157, "dur": 7.419, + "args": { + "External id": 2938532,"Sequence number": 29294716, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8507 + } + }, + { + "ph": "s", "id": 72, "pid": 1336755, "tid": 1336755, "ts": 1555015839318.157, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015839319.822, "dur": 4.480, + "args": { + "External id": 2938533,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015839323.153, "dur": 1.015, + "args": { + "External id": 2938534,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015839326.321, "dur": 59.102, + "args": { + "External id": 2938535,"Sequence number": 29294717, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839327.933, "dur": 4.783, + "args": { + "External id": 2938536,"Sequence number": 29294717, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839328.740, "dur": 3.842, + "args": { + "External id": 2938537,"Sequence number": 29294717, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8512 + } + }, + { + "ph": "s", "id": 71, "pid": 1336755, "tid": 1336755, "ts": 1555015839328.740, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015839334.851, "dur": 46.828, + "args": { + "External id": 2938538,"Sequence number": 29294718, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8513 + } + }, + { + "ph": "s", "id": 70, "pid": 1336755, "tid": 1336755, "ts": 1555015839334.851, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839383.281, "dur": 1.778, + "args": { + "External id": 2938539,"Sequence number": 29294719, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8514 + } + }, + { + "ph": "s", "id": 69, "pid": 1336755, "tid": 1336755, "ts": 1555015839383.281, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015839405.303, "dur": 161.202, + "args": { + "External id": 2938540,"Sequence number": 29294720, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8515 + } + }, + { + "ph": "s", "id": 68, "pid": 1336755, "tid": 1336755, "ts": 1555015839405.303, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015839440.432, "dur": 4.181, + "args": { + "External id": 2938541,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015839473.996, "dur": 80.472, + "args": { + "External id": 2938542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015839474.732, "dur": 4.298, + "args": { + "External id": 2938543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015839475.698, "dur": 2.513, + "args": { + "External id": 2938544,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015839477.410, "dur": 0.608, + "args": { + "External id": 2938545,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015839479.745, "dur": 74.404, + "args": { + "External id": 2938546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839480.919, "dur": 2.336, + "args": { + "External id": 2938547,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839481.970, "dur": 1.164, + "args": { + "External id": 2938548,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015839485.226, "dur": 65.356, + "args": { + "External id": 2938549,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839552.280, "dur": 1.323, + "args": { + "External id": 2938550,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555015839573.834, "dur": 23.336, + "args": { + "External id": 2938551,"Sequence number": 29294721, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8526 + } + }, + { + "ph": "s", "id": 67, "pid": 1336755, "tid": 1336755, "ts": 1555015839573.834, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015839627.219, "dur": 172.336, + "args": { + "External id": 2938552,"Sequence number": 29294722, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8527 + } + }, + { + "ph": "s", "id": 66, "pid": 1336755, "tid": 1336755, "ts": 1555015839627.219, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839644.319, "dur": 2.266, + "args": { + "External id": 2938553,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839644.949, "dur": 1.446, + "args": { + "External id": 2938554,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015839656.248, "dur": 8.071, + "args": { + "External id": 2938555,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015839659.533, "dur": 4.348, + "args": { + "External id": 2938556,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015839673.231, "dur": 5.575, + "args": { + "External id": 2938557,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839788.054, "dur": 3.111, + "args": { + "External id": 2938558,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839789.180, "dur": 1.709, + "args": { + "External id": 2938559,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015839817.527, "dur": 80.584, + "args": { + "External id": 2938560,"Sequence number": 29294723, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015839818.785, "dur": 7.803, + "args": { + "External id": 2938561,"Sequence number": 29294723, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8536 + } + }, + { + "ph": "s", "id": 65, "pid": 1336755, "tid": 1336755, "ts": 1555015839818.785, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015839820.770, "dur": 4.785, + "args": { + "External id": 2938562,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015839824.129, "dur": 1.190, + "args": { + "External id": 2938563,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015839827.316, "dur": 70.529, + "args": { + "External id": 2938564,"Sequence number": 29294724, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839828.653, "dur": 3.137, + "args": { + "External id": 2938565,"Sequence number": 29294724, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839829.387, "dur": 2.278, + "args": { + "External id": 2938566,"Sequence number": 29294724, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8541 + } + }, + { + "ph": "s", "id": 64, "pid": 1336755, "tid": 1336755, "ts": 1555015839829.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015839833.957, "dur": 58.684, + "args": { + "External id": 2938567,"Sequence number": 29294725, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8542 + } + }, + { + "ph": "s", "id": 63, "pid": 1336755, "tid": 1336755, "ts": 1555015839833.957, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839894.433, "dur": 2.823, + "args": { + "External id": 2938568,"Sequence number": 29294726, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8543 + } + }, + { + "ph": "s", "id": 62, "pid": 1336755, "tid": 1336755, "ts": 1555015839894.433, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015839906.553, "dur": 63.393, + "args": { + "External id": 2938569,"Sequence number": 29294727, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015839907.106, "dur": 8.590, + "args": { + "External id": 2938570,"Sequence number": 29294727, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8545 + } + }, + { + "ph": "s", "id": 61, "pid": 1336755, "tid": 1336755, "ts": 1555015839907.106, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015839908.933, "dur": 5.737, + "args": { + "External id": 2938571,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015839912.050, "dur": 2.360, + "args": { + "External id": 2938572,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015839916.264, "dur": 53.395, + "args": { + "External id": 2938573,"Sequence number": 29294728, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015839917.165, "dur": 3.126, + "args": { + "External id": 2938574,"Sequence number": 29294728, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839917.897, "dur": 2.227, + "args": { + "External id": 2938575,"Sequence number": 29294728, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8550 + } + }, + { + "ph": "s", "id": 60, "pid": 1336755, "tid": 1336755, "ts": 1555015839917.897, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015839920.886, "dur": 41.206, + "args": { + "External id": 2938576,"Sequence number": 29294729, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8551 + } + }, + { + "ph": "s", "id": 59, "pid": 1336755, "tid": 1336755, "ts": 1555015839920.886, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015839963.633, "dur": 5.647, + "args": { + "External id": 2938577,"Sequence number": 29294730, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8552 + } + }, + { + "ph": "s", "id": 58, "pid": 1336755, "tid": 1336755, "ts": 1555015839963.633, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015839976.421, "dur": 104.626, + "args": { + "External id": 2938578,"Sequence number": 29294731, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015839977.141, "dur": 5.015, + "args": { + "External id": 2938579,"Sequence number": 29294731, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8554 + } + }, + { + "ph": "s", "id": 57, "pid": 1336755, "tid": 1336755, "ts": 1555015839977.141, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015839978.710, "dur": 2.270, + "args": { + "External id": 2938580,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015839980.114, "dur": 0.735, + "args": { + "External id": 2938581,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015839984.267, "dur": 96.489, + "args": { + "External id": 2938582,"Sequence number": 29294732, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840022.678, "dur": 5.491, + "args": { + "External id": 2938583,"Sequence number": 29294732, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840023.247, "dur": 4.673, + "args": { + "External id": 2938584,"Sequence number": 29294732, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8559 + } + }, + { + "ph": "s", "id": 56, "pid": 1336755, "tid": 1336755, "ts": 1555015840023.247, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015840028.920, "dur": 46.122, + "args": { + "External id": 2938585,"Sequence number": 29294733, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8560 + } + }, + { + "ph": "s", "id": 55, "pid": 1336755, "tid": 1336755, "ts": 1555015840028.920, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840076.466, "dur": 3.782, + "args": { + "External id": 2938586,"Sequence number": 29294734, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8561 + } + }, + { + "ph": "s", "id": 54, "pid": 1336755, "tid": 1336755, "ts": 1555015840076.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840097.818, "dur": 3.528, + "args": { + "External id": 2938587,"Sequence number": 29294735, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840098.513, "dur": 2.707, + "args": { + "External id": 2938588,"Sequence number": 29294735, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8563 + } + }, + { + "ph": "s", "id": 53, "pid": 1336755, "tid": 1336755, "ts": 1555015840098.513, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840107.804, "dur": 3.136, + "args": { + "External id": 2938589,"Sequence number": 29294736, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840108.813, "dur": 1.999, + "args": { + "External id": 2938590,"Sequence number": 29294736, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8565 + } + }, + { + "ph": "s", "id": 52, "pid": 1336755, "tid": 1336755, "ts": 1555015840108.813, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840116.928, "dur": 4.712, + "args": { + "External id": 2938591,"Sequence number": 29294737, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840117.949, "dur": 3.536, + "args": { + "External id": 2938592,"Sequence number": 29294737, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8567 + } + }, + { + "ph": "s", "id": 51, "pid": 1336755, "tid": 1336755, "ts": 1555015840117.949, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015840165.929, "dur": 145.525, + "args": { + "External id": 2938593,"Sequence number": 29294738, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8568 + } + }, + { + "ph": "s", "id": 50, "pid": 1336755, "tid": 1336755, "ts": 1555015840165.929, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015840183.877, "dur": 8.696, + "args": { + "External id": 2938594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015840186.750, "dur": 5.145, + "args": { + "External id": 2938595,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015840323.036, "dur": 95.671, + "args": { + "External id": 2938596,"Sequence number": 29294739, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "s", "id": 49, "pid": 1336755, "tid": 1336755, "ts": 1555015840323.036, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015840335.058, "dur": 6.565, + "args": { + "External id": 2938597,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015840337.119, "dur": 4.067, + "args": { + "External id": 2938598,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336755, "tid": 1336755, + "ts": 1555015840443.128, "dur": 171.837, + "args": { + "External id": 2938599,"Sequence number": 29294740, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8574 + } + }, + { + "ph": "s", "id": 48, "pid": 1336755, "tid": 1336755, "ts": 1555015840443.128, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555015840470.200, "dur": 117.313, + "args": { + "External id": 2938600,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015840515.556, "dur": 6.514, + "args": { + "External id": 2938601,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015840517.362, "dur": 4.343, + "args": { + "External id": 2938602,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015840524.786, "dur": 4.046, + "args": { + "External id": 2938603,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015840530.488, "dur": 2.811, + "args": { + "External id": 2938604,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015840535.669, "dur": 3.254, + "args": { + "External id": 2938605,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336755, "tid": 1336755, + "ts": 1555015840599.471, "dur": 4.515, + "args": { + "External id": 2938606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840619.836, "dur": 5.100, + "args": { + "External id": 2938607,"Sequence number": 29294741, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840621.098, "dur": 3.713, + "args": { + "External id": 2938608,"Sequence number": 29294741, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8583 + } + }, + { + "ph": "s", "id": 47, "pid": 1336755, "tid": 1336755, "ts": 1555015840621.098, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015840636.612, "dur": 98.192, + "args": { + "External id": 2938609,"Sequence number": 29294742, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015840637.968, "dur": 8.233, + "args": { + "External id": 2938610,"Sequence number": 29294742, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8585 + } + }, + { + "ph": "s", "id": 46, "pid": 1336755, "tid": 1336755, "ts": 1555015840637.968, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015840640.739, "dur": 4.280, + "args": { + "External id": 2938611,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015840643.101, "dur": 1.574, + "args": { + "External id": 2938612,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015840648.943, "dur": 85.527, + "args": { + "External id": 2938613,"Sequence number": 29294743, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840650.869, "dur": 3.388, + "args": { + "External id": 2938614,"Sequence number": 29294743, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840652.039, "dur": 2.082, + "args": { + "External id": 2938615,"Sequence number": 29294743, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8590 + } + }, + { + "ph": "s", "id": 45, "pid": 1336755, "tid": 1336755, "ts": 1555015840652.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015840655.035, "dur": 72.855, + "args": { + "External id": 2938616,"Sequence number": 29294744, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8591 + } + }, + { + "ph": "s", "id": 44, "pid": 1336755, "tid": 1336755, "ts": 1555015840655.035, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840730.403, "dur": 3.414, + "args": { + "External id": 2938617,"Sequence number": 29294745, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8592 + } + }, + { + "ph": "s", "id": 43, "pid": 1336755, "tid": 1336755, "ts": 1555015840730.403, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015840767.304, "dur": 193.851, + "args": { + "External id": 2938618,"Sequence number": 29294746, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8593 + } + }, + { + "ph": "s", "id": 42, "pid": 1336755, "tid": 1336755, "ts": 1555015840767.304, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840784.048, "dur": 2.938, + "args": { + "External id": 2938619,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840785.088, "dur": 1.765, + "args": { + "External id": 2938620,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336755, "tid": 1336755, + "ts": 1555015840790.199, "dur": 4.953, + "args": { + "External id": 2938621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840791.290, "dur": 3.751, + "args": { + "External id": 2938622,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840793.804, "dur": 1.116, + "args": { + "External id": 2938623,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015840802.098, "dur": 5.870, + "args": { + "External id": 2938624,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015840803.888, "dur": 3.733, + "args": { + "External id": 2938625,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015840813.358, "dur": 3.191, + "args": { + "External id": 2938626,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015840819.909, "dur": 4.235, + "args": { + "External id": 2938627,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840941.983, "dur": 3.101, + "args": { + "External id": 2938628,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840943.000, "dur": 1.883, + "args": { + "External id": 2938629,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015840948.939, "dur": 2.023, + "args": { + "External id": 2938630,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015840949.866, "dur": 1.005, + "args": { + "External id": 2938631,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015840978.593, "dur": 141.614, + "args": { + "External id": 2938632,"Sequence number": 29294747, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015840979.642, "dur": 48.796, + "args": { + "External id": 2938633,"Sequence number": 29294747, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8608 + } + }, + { + "ph": "s", "id": 41, "pid": 1336755, "tid": 1336755, "ts": 1555015840979.642, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015840981.553, "dur": 44.879, + "args": { + "External id": 2938634,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015840983.398, "dur": 42.105, + "args": { + "External id": 2938635,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015841031.608, "dur": 88.292, + "args": { + "External id": 2938636,"Sequence number": 29294748, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015841033.368, "dur": 3.994, + "args": { + "External id": 2938637,"Sequence number": 29294748, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015841034.219, "dur": 3.006, + "args": { + "External id": 2938638,"Sequence number": 29294748, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8613 + } + }, + { + "ph": "s", "id": 40, "pid": 1336755, "tid": 1336755, "ts": 1555015841034.219, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015841038.308, "dur": 72.762, + "args": { + "External id": 2938639,"Sequence number": 29294749, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8614 + } + }, + { + "ph": "s", "id": 39, "pid": 1336755, "tid": 1336755, "ts": 1555015841038.308, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015841113.200, "dur": 5.991, + "args": { + "External id": 2938640,"Sequence number": 29294750, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8615 + } + }, + { + "ph": "s", "id": 38, "pid": 1336755, "tid": 1336755, "ts": 1555015841113.200, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015841130.634, "dur": 92.837, + "args": { + "External id": 2938641,"Sequence number": 29294751, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015841131.355, "dur": 8.257, + "args": { + "External id": 2938642,"Sequence number": 29294751, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8617 + } + }, + { + "ph": "s", "id": 37, "pid": 1336755, "tid": 1336755, "ts": 1555015841131.355, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015841135.400, "dur": 3.232, + "args": { + "External id": 2938643,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841137.094, "dur": 1.313, + "args": { + "External id": 2938644,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015841140.363, "dur": 82.886, + "args": { + "External id": 2938645,"Sequence number": 29294752, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015841157.155, "dur": 6.355, + "args": { + "External id": 2938646,"Sequence number": 29294752, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015841158.533, "dur": 4.714, + "args": { + "External id": 2938647,"Sequence number": 29294752, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8622 + } + }, + { + "ph": "s", "id": 36, "pid": 1336755, "tid": 1336755, "ts": 1555015841158.533, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015841164.078, "dur": 54.083, + "args": { + "External id": 2938648,"Sequence number": 29294753, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8623 + } + }, + { + "ph": "s", "id": 35, "pid": 1336755, "tid": 1336755, "ts": 1555015841164.078, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015841219.862, "dur": 2.866, + "args": { + "External id": 2938649,"Sequence number": 29294754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8624 + } + }, + { + "ph": "s", "id": 34, "pid": 1336755, "tid": 1336755, "ts": 1555015841219.862, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015841247.512, "dur": 143.941, + "args": { + "External id": 2938650,"Sequence number": 29294755, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8625 + } + }, + { + "ph": "s", "id": 33, "pid": 1336755, "tid": 1336755, "ts": 1555015841247.512, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015841284.453, "dur": 4.269, + "args": { + "External id": 2938651,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015841318.184, "dur": 61.570, + "args": { + "External id": 2938652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015841319.189, "dur": 6.481, + "args": { + "External id": 2938653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015841320.247, "dur": 4.672, + "args": { + "External id": 2938654,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841321.980, "dur": 2.741, + "args": { + "External id": 2938655,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015841328.292, "dur": 51.213, + "args": { + "External id": 2938656,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336755, "tid": 1336755, + "ts": 1555015841329.981, "dur": 2.276, + "args": { + "External id": 2938657,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015841330.564, "dur": 1.370, + "args": { + "External id": 2938658,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015841333.046, "dur": 42.435, + "args": { + "External id": 2938659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336755, "tid": 1336755, + "ts": 1555015841377.647, "dur": 1.268, + "args": { + "External id": 2938660,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555015841399.521, "dur": 23.007, + "args": { + "External id": 2938661,"Sequence number": 29294756, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8636 + } + }, + { + "ph": "s", "id": 32, "pid": 1336755, "tid": 1336755, "ts": 1555015841399.521, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 1336755, "tid": 1336755, + "ts": 1555015841440.630, "dur": 37.162, + "args": { + "External id": 2938662,"Sequence number": 29294757, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1]], []], "Input Dims": [[[16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048]], []], "Ev Idx": 8637 + } + }, + { + "ph": "s", "id": 31, "pid": 1336755, "tid": 1336755, "ts": 1555015841440.630, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 1336755, "tid": 1336755, + "ts": 1555015841448.547, "dur": 25.319, + "args": { + "External id": 2938663,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1]], []], "Input Dims": [[[16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048]], []], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015841475.119, "dur": 1.259, + "args": { + "External id": 2938664,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 8192], []], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336755, "tid": 1336755, + "ts": 1555015841509.771, "dur": 33.670, + "args": { + "External id": 2938665,"Record function id": 0, "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 1336755, "tid": 1336755, + "ts": 1555015841544.452, "dur": 179.395, + "args": { + "External id": 2938666,"Record function id": 0, "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015841579.482, "dur": 136.236, + "args": { + "External id": 2938667,"Sequence number": 29294758, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [33554432, 8192, 2048, 1]], "Input Dims": [[2048], [16, 4096, 4, 2048]], "Ev Idx": 8642 + } + }, + { + "ph": "s", "id": 30, "pid": 1336755, "tid": 1336755, "ts": 1555015841579.482, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336755, "tid": 1336755, + "ts": 1555015841644.539, "dur": 36.012, + "args": { + "External id": 2938668,"kernel_hash": "cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4hlidcuq75f7buaadgxwen3jotcnqzgd2kokws4tswzqgyi45s.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[262144, 2048], [262144, 2048], [2048], [262144], [], [], [], [], [], [], [], [], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555015841805.125, "dur": 37.069, + "args": { + "External id": 2938669,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015841807.446, "dur": 4.234, + "args": { + "External id": 2938670,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015841814.447, "dur": 27.362, + "args": { + "External id": 2938671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015841817.666, "dur": 23.577, + "args": { + "External id": 2938672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555015841846.681, "dur": 16.491, + "args": { + "External id": 2938673,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015841847.374, "dur": 2.684, + "args": { + "External id": 2938674,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015841850.805, "dur": 12.091, + "args": { + "External id": 2938675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015841851.376, "dur": 11.127, + "args": { + "External id": 2938676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555015841866.155, "dur": 16.595, + "args": { + "External id": 2938677,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015841866.726, "dur": 2.574, + "args": { + "External id": 2938678,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015841869.800, "dur": 12.675, + "args": { + "External id": 2938679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015841872.092, "dur": 10.027, + "args": { + "External id": 2938680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015841892.168, "dur": 0.578, + "args": { + "External id": 2938681,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 1336755, "tid": 1336755, + "ts": 1555015841899.696, "dur": 8.686, + "args": { + "External id": 2938682,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841904.913, "dur": 1.847, + "args": { + "External id": 2938683,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841914.934, "dur": 5.847, + "args": { + "External id": 2938684,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841918.617, "dur": 0.760, + "args": { + "External id": 2938685,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841921.934, "dur": 3.040, + "args": { + "External id": 2938686,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841923.643, "dur": 0.661, + "args": { + "External id": 2938687,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841926.211, "dur": 3.264, + "args": { + "External id": 2938688,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841928.137, "dur": 0.779, + "args": { + "External id": 2938689,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841932.432, "dur": 3.267, + "args": { + "External id": 2938690,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 4], [], [], [], []], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841933.981, "dur": 1.158, + "args": { + "External id": 2938691,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 4], [], [], []], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841936.731, "dur": 3.264, + "args": { + "External id": 2938692,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 4], [], [], [], []], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841938.435, "dur": 0.813, + "args": { + "External id": 2938693,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 4], [], [], []], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841942.568, "dur": 3.150, + "args": { + "External id": 2938694,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4096, 4], [], [], [], []], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841944.281, "dur": 0.907, + "args": { + "External id": 2938695,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4096, 4], [], [], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015841949.131, "dur": 5.159, + "args": { + "External id": 2938696,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4096, 4], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841952.994, "dur": 0.571, + "args": { + "External id": 2938697,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4096, 4], [], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841958.277, "dur": 2.914, + "args": { + "External id": 2938698,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841959.925, "dur": 0.677, + "args": { + "External id": 2938699,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555015841964.331, "dur": 6.018, + "args": { + "External id": 2938700,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841968.241, "dur": 1.149, + "args": { + "External id": 2938701,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841971.439, "dur": 4.557, + "args": { + "External id": 2938702,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841974.752, "dur": 0.700, + "args": { + "External id": 2938703,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015841978.289, "dur": 5.160, + "args": { + "External id": 2938704,"Sequence number": 29294759, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 8679 + } + }, + { + "ph": "s", "id": 29, "pid": 1336755, "tid": 1336755, "ts": 1555015841978.289, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015841981.482, "dur": 0.648, + "args": { + "External id": 2938705,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015842025.458, "dur": 6.605, + "args": { + "External id": 2938706,"Sequence number": 29294760, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 8681 + } + }, + { + "ph": "s", "id": 28, "pid": 1336755, "tid": 1336755, "ts": 1555015842025.458, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015842029.725, "dur": 0.909, + "args": { + "External id": 2938707,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555015842033.500, "dur": 6.660, + "args": { + "External id": 2938708,"Sequence number": 29294761, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 8683 + } + }, + { + "ph": "s", "id": 27, "pid": 1336755, "tid": 1336755, "ts": 1555015842033.500, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015842038.236, "dur": 1.051, + "args": { + "External id": 2938709,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015842041.550, "dur": 5.550, + "args": { + "External id": 2938710,"Sequence number": 29294762, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 8685 + } + }, + { + "ph": "s", "id": 26, "pid": 1336755, "tid": 1336755, "ts": 1555015842041.550, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015842044.372, "dur": 1.863, + "args": { + "External id": 2938711,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555015842051.910, "dur": 36.714, + "args": { + "External id": 2938712,"Sequence number": 29294763, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555015842054.082, "dur": 34.345, + "args": { + "External id": 2938713,"Sequence number": 29294763, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015842056.518, "dur": 6.141, + "args": { + "External id": 2938714,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015842058.589, "dur": 3.561, + "args": { + "External id": 2938715,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842064.248, "dur": 23.746, + "args": { + "External id": 2938716,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015842114.353, "dur": 4.334, + "args": { + "External id": 2938717,"Sequence number": 29294763, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8692 + } + }, + { + "ph": "s", "id": 25, "pid": 1336755, "tid": 1336755, "ts": 1555015842114.353, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015842120.856, "dur": 1.512, + "args": { + "External id": 2938718,"Sequence number": 29294764, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015842163.252, "dur": 71574.240, + "args": { + "External id": 2938719,"Sequence number": 29294764, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 8694 + } + }, + { + "ph": "s", "id": 24, "pid": 1336755, "tid": 1336755, "ts": 1555015842163.252, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555015842178.839, "dur": 32.083, + "args": { + "External id": 2938720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555015842179.677, "dur": 31.033, + "args": { + "External id": 2938721,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015842181.139, "dur": 7.724, + "args": { + "External id": 2938722,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015842182.697, "dur": 5.606, + "args": { + "External id": 2938723,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842189.606, "dur": 20.703, + "args": { + "External id": 2938724,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015842228.252, "dur": 27.059, + "args": { + "External id": 2938725,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015842229.699, "dur": 7.606, + "args": { + "External id": 2938726,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015842233.045, "dur": 3.954, + "args": { + "External id": 2938727,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842238.173, "dur": 16.905, + "args": { + "External id": 2938728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842239.790, "dur": 14.881, + "args": { + "External id": 2938729,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015842258.988, "dur": 27.112, + "args": { + "External id": 2938730,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015842259.839, "dur": 4.638, + "args": { + "External id": 2938731,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015842261.286, "dur": 2.969, + "args": { + "External id": 2938732,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842273.849, "dur": 12.049, + "args": { + "External id": 2938733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842274.387, "dur": 11.044, + "args": { + "External id": 2938734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555015842290.854, "dur": 19.716, + "args": { + "External id": 2938735,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015842294.258, "dur": 4.742, + "args": { + "External id": 2938736,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842299.689, "dur": 10.624, + "args": { + "External id": 2938737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 8712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842300.179, "dur": 9.830, + "args": { + "External id": 2938738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1336755, + "ts": 1555015842317.224, "dur": 26.194, + "args": { + "External id": 2938739,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015842347.727, "dur": 57.913, + "args": { + "External id": 2938740,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015842351.272, "dur": 53.955, + "args": { + "External id": 2938741,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015842357.353, "dur": 1.472, + "args": { + "External id": 2938742,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015842360.096, "dur": 26.427, + "args": { + "External id": 2938743,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015842361.879, "dur": 24.443, + "args": { + "External id": 2938744,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015842365.911, "dur": 2.738, + "args": { + "External id": 2938745,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015842371.229, "dur": 14.748, + "args": { + "External id": 2938746,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1336755, + "ts": 1555015842410.880, "dur": 65706.571, + "args": { + "External id": 2938747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1336755, + "ts": 1555015842412.649, "dur": 65703.741, + "args": { + "External id": 2938748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015908129.679, "dur": 7.352, + "args": { + "External id": 2938749,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908133.762, "dur": 1.209, + "args": { + "External id": 2938750,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015908153.000, "dur": 110.210, + "args": { + "External id": 2938751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015908154.640, "dur": 10.522, + "args": { + "External id": 2938752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015908156.935, "dur": 7.233, + "args": { + "External id": 2938753,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908163.004, "dur": 0.768, + "args": { + "External id": 2938754,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015908166.530, "dur": 95.846, + "args": { + "External id": 2938755,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015908168.519, "dur": 93.038, + "args": { + "External id": 2938756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015908267.493, "dur": 4.639, + "args": { + "External id": 2938757,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908269.566, "dur": 1.058, + "args": { + "External id": 2938758,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015908281.023, "dur": 3.686, + "args": { + "External id": 2938759,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015908298.221, "dur": 7.114, + "args": { + "External id": 2938760,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015908301.081, "dur": 3.972, + "args": { + "External id": 2938761,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015908443.834, "dur": 231.194, + "args": { + "External id": 2938762,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015908449.041, "dur": 3.269, + "args": { + "External id": 2938763,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015908454.870, "dur": 219.512, + "args": { + "External id": 2938764,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015908458.446, "dur": 0.678, + "args": { + "External id": 2938765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015908462.475, "dur": 29.064, + "args": { + "External id": 2938766,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015908496.003, "dur": 5.980, + "args": { + "External id": 2938767,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908500.541, "dur": 0.956, + "args": { + "External id": 2938768,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015908503.696, "dur": 24.457, + "args": { + "External id": 2938769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015908504.564, "dur": 1.455, + "args": { + "External id": 2938770,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015908508.523, "dur": 19.348, + "args": { + "External id": 2938771,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908512.954, "dur": 3.265, + "args": { + "External id": 2938772,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015908530.420, "dur": 24.557, + "args": { + "External id": 2938773,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908558.041, "dur": 17.571, + "args": { + "External id": 2938774,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015908579.587, "dur": 17.178, + "args": { + "External id": 2938775,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908598.971, "dur": 16.392, + "args": { + "External id": 2938776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015908620.105, "dur": 21.492, + "args": { + "External id": 2938777,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908622.237, "dur": 1.668, + "args": { + "External id": 2938778,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908626.042, "dur": 0.950, + "args": { + "External id": 2938779,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908645.061, "dur": 14.871, + "args": { + "External id": 2938780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908662.658, "dur": 10.482, + "args": { + "External id": 2938781,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015908681.926, "dur": 1.931, + "args": { + "External id": 2938782,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015908692.325, "dur": 4.153, + "args": { + "External id": 2938783,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908694.598, "dur": 1.053, + "args": { + "External id": 2938784,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015908777.891, "dur": 72.167, + "args": { + "External id": 2938785,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015908855.792, "dur": 4.861, + "args": { + "External id": 2938786,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908858.443, "dur": 0.955, + "args": { + "External id": 2938787,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908862.261, "dur": 31.446, + "args": { + "External id": 2938788,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015908899.486, "dur": 7.880, + "args": { + "External id": 2938789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015908901.043, "dur": 5.631, + "args": { + "External id": 2938790,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015908904.820, "dur": 1.591, + "args": { + "External id": 2938791,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015908911.402, "dur": 46.038, + "args": { + "External id": 2938792,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015908912.844, "dur": 43.992, + "args": { + "External id": 2938793,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015908963.133, "dur": 16.583, + "args": { + "External id": 2938794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015909026.789, "dur": 5.157, + "args": { + "External id": 2938795,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909029.419, "dur": 1.144, + "args": { + "External id": 2938796,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015909039.180, "dur": 56.981, + "args": { + "External id": 2938797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015909040.021, "dur": 4.350, + "args": { + "External id": 2938798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015909041.114, "dur": 2.664, + "args": { + "External id": 2938799,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909042.632, "dur": 0.987, + "args": { + "External id": 2938800,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015909045.285, "dur": 50.368, + "args": { + "External id": 2938801,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015909046.162, "dur": 48.893, + "args": { + "External id": 2938802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015909101.830, "dur": 5.225, + "args": { + "External id": 2938803,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909103.778, "dur": 2.210, + "args": { + "External id": 2938804,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015909114.500, "dur": 1.788, + "args": { + "External id": 2938805,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909124.665, "dur": 6.840, + "args": { + "External id": 2938806,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909126.933, "dur": 4.318, + "args": { + "External id": 2938807,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015909251.107, "dur": 187.194, + "args": { + "External id": 2938808,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909254.025, "dur": 3.124, + "args": { + "External id": 2938809,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015909259.258, "dur": 178.563, + "args": { + "External id": 2938810,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015909260.916, "dur": 0.417, + "args": { + "External id": 2938811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015909262.576, "dur": 24.854, + "args": { + "External id": 2938812,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015909289.175, "dur": 3.729, + "args": { + "External id": 2938813,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909291.309, "dur": 1.107, + "args": { + "External id": 2938814,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015909293.899, "dur": 25.369, + "args": { + "External id": 2938815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909295.131, "dur": 1.849, + "args": { + "External id": 2938816,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015909298.419, "dur": 20.476, + "args": { + "External id": 2938817,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909304.477, "dur": 3.263, + "args": { + "External id": 2938818,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015909320.829, "dur": 21.706, + "args": { + "External id": 2938819,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909344.326, "dur": 12.455, + "args": { + "External id": 2938820,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015909359.592, "dur": 14.928, + "args": { + "External id": 2938821,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909375.819, "dur": 11.945, + "args": { + "External id": 2938822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015909389.403, "dur": 19.767, + "args": { + "External id": 2938823,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909391.243, "dur": 1.664, + "args": { + "External id": 2938824,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909395.156, "dur": 0.545, + "args": { + "External id": 2938825,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909412.623, "dur": 11.881, + "args": { + "External id": 2938826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909425.630, "dur": 10.941, + "args": { + "External id": 2938827,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015909445.108, "dur": 2.199, + "args": { + "External id": 2938828,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015909456.359, "dur": 3.894, + "args": { + "External id": 2938829,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909458.884, "dur": 0.633, + "args": { + "External id": 2938830,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015909526.931, "dur": 57.453, + "args": { + "External id": 2938831,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015909589.350, "dur": 4.606, + "args": { + "External id": 2938832,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909591.909, "dur": 1.055, + "args": { + "External id": 2938833,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909597.061, "dur": 25.961, + "args": { + "External id": 2938834,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015909627.893, "dur": 6.057, + "args": { + "External id": 2938835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015909629.826, "dur": 3.429, + "args": { + "External id": 2938836,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909631.813, "dur": 1.225, + "args": { + "External id": 2938837,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015909636.415, "dur": 42.190, + "args": { + "External id": 2938838,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015909637.877, "dur": 40.124, + "args": { + "External id": 2938839,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909682.453, "dur": 14.377, + "args": { + "External id": 2938840,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015909703.850, "dur": 3.564, + "args": { + "External id": 2938841,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909705.874, "dur": 0.739, + "args": { + "External id": 2938842,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015909711.131, "dur": 52.985, + "args": { + "External id": 2938843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015909711.933, "dur": 5.738, + "args": { + "External id": 2938844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015909712.851, "dur": 4.152, + "args": { + "External id": 2938845,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909716.172, "dur": 0.694, + "args": { + "External id": 2938846,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015909720.264, "dur": 43.438, + "args": { + "External id": 2938847,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015909720.967, "dur": 42.074, + "args": { + "External id": 2938848,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015909768.124, "dur": 3.547, + "args": { + "External id": 2938849,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909770.026, "dur": 0.539, + "args": { + "External id": 2938850,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015909777.089, "dur": 1.490, + "args": { + "External id": 2938851,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909785.633, "dur": 5.499, + "args": { + "External id": 2938852,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909787.697, "dur": 3.150, + "args": { + "External id": 2938853,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015909877.762, "dur": 297.482, + "args": { + "External id": 2938854,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909880.168, "dur": 2.027, + "args": { + "External id": 2938855,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015909883.398, "dur": 291.300, + "args": { + "External id": 2938856,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015909886.423, "dur": 0.265, + "args": { + "External id": 2938857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015909888.128, "dur": 18.085, + "args": { + "External id": 2938858,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015909908.015, "dur": 2.899, + "args": { + "External id": 2938859,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015909910.051, "dur": 0.663, + "args": { + "External id": 2938860,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015909912.034, "dur": 23.204, + "args": { + "External id": 2938861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015909914.775, "dur": 1.261, + "args": { + "External id": 2938862,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015909917.181, "dur": 17.724, + "args": { + "External id": 2938863,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909921.359, "dur": 2.450, + "args": { + "External id": 2938864,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015909936.566, "dur": 18.043, + "args": { + "External id": 2938865,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015909956.116, "dur": 12.350, + "args": { + "External id": 2938866,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015909971.652, "dur": 62.293, + "args": { + "External id": 2938867,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910036.420, "dur": 15.630, + "args": { + "External id": 2938868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015910054.126, "dur": 36.001, + "args": { + "External id": 2938869,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910056.025, "dur": 2.179, + "args": { + "External id": 2938870,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910060.719, "dur": 0.991, + "args": { + "External id": 2938871,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910093.552, "dur": 38.108, + "args": { + "External id": 2938872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910133.039, "dur": 39.605, + "args": { + "External id": 2938873,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015910200.677, "dur": 4.053, + "args": { + "External id": 2938874,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015910218.408, "dur": 4.606, + "args": { + "External id": 2938875,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910221.517, "dur": 0.811, + "args": { + "External id": 2938876,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015910303.629, "dur": 62.905, + "args": { + "External id": 2938877,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015910371.263, "dur": 11.192, + "args": { + "External id": 2938878,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910378.917, "dur": 2.456, + "args": { + "External id": 2938879,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910383.903, "dur": 24.180, + "args": { + "External id": 2938880,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015910444.582, "dur": 5.746, + "args": { + "External id": 2938881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015910446.482, "dur": 3.137, + "args": { + "External id": 2938882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910448.567, "dur": 0.819, + "args": { + "External id": 2938883,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015910453.091, "dur": 45.240, + "args": { + "External id": 2938884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015910454.351, "dur": 43.361, + "args": { + "External id": 2938885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910504.762, "dur": 15.333, + "args": { + "External id": 2938886,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015910525.559, "dur": 3.380, + "args": { + "External id": 2938887,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910527.367, "dur": 0.786, + "args": { + "External id": 2938888,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015910533.010, "dur": 49.966, + "args": { + "External id": 2938889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015910533.836, "dur": 5.381, + "args": { + "External id": 2938890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015910534.321, "dur": 4.334, + "args": { + "External id": 2938891,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910537.634, "dur": 0.718, + "args": { + "External id": 2938892,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015910539.949, "dur": 42.671, + "args": { + "External id": 2938893,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015910540.595, "dur": 41.475, + "args": { + "External id": 2938894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015910586.792, "dur": 2.963, + "args": { + "External id": 2938895,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910588.123, "dur": 0.734, + "args": { + "External id": 2938896,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015910595.362, "dur": 1.223, + "args": { + "External id": 2938897,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015910605.739, "dur": 7.724, + "args": { + "External id": 2938898,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015910607.752, "dur": 5.396, + "args": { + "External id": 2938899,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015910695.944, "dur": 175.146, + "args": { + "External id": 2938900,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015910698.298, "dur": 1.977, + "args": { + "External id": 2938901,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015910701.897, "dur": 168.694, + "args": { + "External id": 2938902,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015910703.179, "dur": 0.347, + "args": { + "External id": 2938903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015910704.705, "dur": 22.691, + "args": { + "External id": 2938904,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015910730.824, "dur": 2.995, + "args": { + "External id": 2938905,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910732.871, "dur": 0.634, + "args": { + "External id": 2938906,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015910734.918, "dur": 22.371, + "args": { + "External id": 2938907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015910735.771, "dur": 1.593, + "args": { + "External id": 2938908,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015910738.466, "dur": 18.463, + "args": { + "External id": 2938909,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910743.014, "dur": 2.726, + "args": { + "External id": 2938910,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015910758.618, "dur": 20.218, + "args": { + "External id": 2938911,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910780.237, "dur": 12.398, + "args": { + "External id": 2938912,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015910795.461, "dur": 13.619, + "args": { + "External id": 2938913,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910810.180, "dur": 11.995, + "args": { + "External id": 2938914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015910825.753, "dur": 18.188, + "args": { + "External id": 2938915,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910827.464, "dur": 1.421, + "args": { + "External id": 2938916,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910830.912, "dur": 0.663, + "args": { + "External id": 2938917,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910845.712, "dur": 11.499, + "args": { + "External id": 2938918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015910858.179, "dur": 11.290, + "args": { + "External id": 2938919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015910877.035, "dur": 1.676, + "args": { + "External id": 2938920,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015910886.671, "dur": 3.535, + "args": { + "External id": 2938921,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015910889.064, "dur": 0.459, + "args": { + "External id": 2938922,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015910948.885, "dur": 88.671, + "args": { + "External id": 2938923,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015911044.164, "dur": 5.819, + "args": { + "External id": 2938924,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911047.213, "dur": 1.318, + "args": { + "External id": 2938925,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911051.452, "dur": 25.638, + "args": { + "External id": 2938926,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015911082.103, "dur": 6.567, + "args": { + "External id": 2938927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015911083.529, "dur": 4.542, + "args": { + "External id": 2938928,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911086.825, "dur": 0.993, + "args": { + "External id": 2938929,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015911091.648, "dur": 44.905, + "args": { + "External id": 2938930,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015911092.719, "dur": 43.161, + "args": { + "External id": 2938931,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911140.120, "dur": 31.214, + "args": { + "External id": 2938932,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015911179.226, "dur": 4.717, + "args": { + "External id": 2938933,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911181.784, "dur": 1.154, + "args": { + "External id": 2938934,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015911188.124, "dur": 53.404, + "args": { + "External id": 2938935,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015911190.809, "dur": 5.219, + "args": { + "External id": 2938936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015911191.691, "dur": 3.777, + "args": { + "External id": 2938937,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911193.334, "dur": 1.930, + "args": { + "External id": 2938938,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015911196.671, "dur": 44.530, + "args": { + "External id": 2938939,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015911197.297, "dur": 43.265, + "args": { + "External id": 2938940,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015911245.555, "dur": 3.335, + "args": { + "External id": 2938941,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911247.235, "dur": 0.750, + "args": { + "External id": 2938942,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015911254.736, "dur": 1.471, + "args": { + "External id": 2938943,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911264.719, "dur": 6.633, + "args": { + "External id": 2938944,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911266.840, "dur": 4.182, + "args": { + "External id": 2938945,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015911353.455, "dur": 170.148, + "args": { + "External id": 2938946,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911355.708, "dur": 2.361, + "args": { + "External id": 2938947,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015911359.265, "dur": 163.906, + "args": { + "External id": 2938948,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015911360.723, "dur": 0.381, + "args": { + "External id": 2938949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015911365.904, "dur": 20.011, + "args": { + "External id": 2938950,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015911387.346, "dur": 3.095, + "args": { + "External id": 2938951,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911389.395, "dur": 0.804, + "args": { + "External id": 2938952,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015911391.435, "dur": 21.300, + "args": { + "External id": 2938953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911392.112, "dur": 2.910, + "args": { + "External id": 2938954,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015911396.505, "dur": 15.937, + "args": { + "External id": 2938955,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911398.911, "dur": 2.842, + "args": { + "External id": 2938956,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015911414.178, "dur": 19.616, + "args": { + "External id": 2938957,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911435.072, "dur": 12.583, + "args": { + "External id": 2938958,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015911450.262, "dur": 12.603, + "args": { + "External id": 2938959,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911464.142, "dur": 11.654, + "args": { + "External id": 2938960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015911479.216, "dur": 17.861, + "args": { + "External id": 2938961,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911481.036, "dur": 1.504, + "args": { + "External id": 2938962,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911484.190, "dur": 0.695, + "args": { + "External id": 2938963,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911498.676, "dur": 11.545, + "args": { + "External id": 2938964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911511.524, "dur": 10.596, + "args": { + "External id": 2938965,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015911531.253, "dur": 1.608, + "args": { + "External id": 2938966,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015911540.756, "dur": 2.920, + "args": { + "External id": 2938967,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911542.602, "dur": 0.402, + "args": { + "External id": 2938968,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015911600.393, "dur": 48.236, + "args": { + "External id": 2938969,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015911653.007, "dur": 4.351, + "args": { + "External id": 2938970,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911655.342, "dur": 1.017, + "args": { + "External id": 2938971,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911658.649, "dur": 20.904, + "args": { + "External id": 2938972,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015911683.800, "dur": 6.161, + "args": { + "External id": 2938973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015911684.855, "dur": 4.550, + "args": { + "External id": 2938974,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911688.333, "dur": 0.890, + "args": { + "External id": 2938975,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015911692.374, "dur": 42.178, + "args": { + "External id": 2938976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015911693.380, "dur": 40.489, + "args": { + "External id": 2938977,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911738.183, "dur": 13.136, + "args": { + "External id": 2938978,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015911756.187, "dur": 3.201, + "args": { + "External id": 2938979,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911757.944, "dur": 0.597, + "args": { + "External id": 2938980,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015911764.553, "dur": 47.828, + "args": { + "External id": 2938981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015911765.262, "dur": 5.153, + "args": { + "External id": 2938982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015911766.096, "dur": 3.808, + "args": { + "External id": 2938983,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911769.050, "dur": 0.707, + "args": { + "External id": 2938984,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015911771.095, "dur": 40.849, + "args": { + "External id": 2938985,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015911772.036, "dur": 39.445, + "args": { + "External id": 2938986,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015911817.576, "dur": 4.087, + "args": { + "External id": 2938987,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911819.720, "dur": 0.883, + "args": { + "External id": 2938988,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015911833.978, "dur": 1.491, + "args": { + "External id": 2938989,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911841.863, "dur": 5.316, + "args": { + "External id": 2938990,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911843.384, "dur": 3.525, + "args": { + "External id": 2938991,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015911919.159, "dur": 218.701, + "args": { + "External id": 2938992,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911923.111, "dur": 2.016, + "args": { + "External id": 2938993,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015911926.525, "dur": 210.907, + "args": { + "External id": 2938994,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015911932.717, "dur": 0.274, + "args": { + "External id": 2938995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015911935.160, "dur": 18.973, + "args": { + "External id": 2938996,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015911955.554, "dur": 4.495, + "args": { + "External id": 2938997,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015911958.896, "dur": 0.965, + "args": { + "External id": 2938998,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015911960.735, "dur": 20.393, + "args": { + "External id": 2938999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015911963.350, "dur": 1.627, + "args": { + "External id": 2939000,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015911965.892, "dur": 14.952, + "args": { + "External id": 2939001,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015911967.758, "dur": 2.240, + "args": { + "External id": 2939002,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015911982.253, "dur": 60.657, + "args": { + "External id": 2939003,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912045.272, "dur": 14.358, + "args": { + "External id": 2939004,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015912062.474, "dur": 12.912, + "args": { + "External id": 2939005,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912076.711, "dur": 11.360, + "args": { + "External id": 2939006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015912090.011, "dur": 21.211, + "args": { + "External id": 2939007,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912091.811, "dur": 1.627, + "args": { + "External id": 2939008,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912097.087, "dur": 0.864, + "args": { + "External id": 2939009,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912112.918, "dur": 10.937, + "args": { + "External id": 2939010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912124.943, "dur": 11.265, + "args": { + "External id": 2939011,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015912159.473, "dur": 2.397, + "args": { + "External id": 2939012,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015912172.302, "dur": 3.840, + "args": { + "External id": 2939013,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912174.711, "dur": 0.707, + "args": { + "External id": 2939014,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015912241.340, "dur": 59.454, + "args": { + "External id": 2939015,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015912305.572, "dur": 6.167, + "args": { + "External id": 2939016,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912309.609, "dur": 0.885, + "args": { + "External id": 2939017,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912313.231, "dur": 24.961, + "args": { + "External id": 2939018,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015912342.895, "dur": 5.344, + "args": { + "External id": 2939019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015912344.338, "dur": 3.208, + "args": { + "External id": 2939020,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912346.158, "dur": 1.176, + "args": { + "External id": 2939021,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015912350.849, "dur": 42.764, + "args": { + "External id": 2939022,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015912353.681, "dur": 39.246, + "args": { + "External id": 2939023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912397.500, "dur": 14.705, + "args": { + "External id": 2939024,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015912417.550, "dur": 3.636, + "args": { + "External id": 2939025,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912419.447, "dur": 0.895, + "args": { + "External id": 2939026,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015912425.013, "dur": 47.356, + "args": { + "External id": 2939027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015912425.816, "dur": 4.997, + "args": { + "External id": 2939028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015912426.871, "dur": 3.478, + "args": { + "External id": 2939029,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912429.618, "dur": 0.599, + "args": { + "External id": 2939030,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015912431.537, "dur": 40.440, + "args": { + "External id": 2939031,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015912432.213, "dur": 39.269, + "args": { + "External id": 2939032,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015912476.726, "dur": 3.969, + "args": { + "External id": 2939033,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912478.533, "dur": 1.009, + "args": { + "External id": 2939034,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015912486.074, "dur": 1.410, + "args": { + "External id": 2939035,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015912494.554, "dur": 9.029, + "args": { + "External id": 2939036,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015912498.114, "dur": 5.176, + "args": { + "External id": 2939037,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015912583.904, "dur": 174.721, + "args": { + "External id": 2939038,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015912586.015, "dur": 2.151, + "args": { + "External id": 2939039,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015912589.885, "dur": 168.041, + "args": { + "External id": 2939040,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015912591.263, "dur": 0.342, + "args": { + "External id": 2939041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015912592.467, "dur": 21.180, + "args": { + "External id": 2939042,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015912615.174, "dur": 3.460, + "args": { + "External id": 2939043,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912617.276, "dur": 1.101, + "args": { + "External id": 2939044,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015912619.820, "dur": 24.229, + "args": { + "External id": 2939045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015912622.476, "dur": 1.803, + "args": { + "External id": 2939046,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015912625.508, "dur": 18.237, + "args": { + "External id": 2939047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912629.483, "dur": 2.916, + "args": { + "External id": 2939048,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015912645.390, "dur": 18.460, + "args": { + "External id": 2939049,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912665.300, "dur": 12.667, + "args": { + "External id": 2939050,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015912680.876, "dur": 12.406, + "args": { + "External id": 2939051,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912694.492, "dur": 11.237, + "args": { + "External id": 2939052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015912707.180, "dur": 23.835, + "args": { + "External id": 2939053,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912708.851, "dur": 1.354, + "args": { + "External id": 2939054,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912717.192, "dur": 0.694, + "args": { + "External id": 2939055,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912732.615, "dur": 11.589, + "args": { + "External id": 2939056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912745.107, "dur": 11.658, + "args": { + "External id": 2939057,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015912764.475, "dur": 1.670, + "args": { + "External id": 2939058,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015912773.491, "dur": 2.953, + "args": { + "External id": 2939059,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912775.257, "dur": 0.437, + "args": { + "External id": 2939060,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015912834.208, "dur": 46.644, + "args": { + "External id": 2939061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015912886.461, "dur": 4.316, + "args": { + "External id": 2939062,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912888.659, "dur": 1.069, + "args": { + "External id": 2939063,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912891.790, "dur": 21.020, + "args": { + "External id": 2939064,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015912916.370, "dur": 4.450, + "args": { + "External id": 2939065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015912917.639, "dur": 2.646, + "args": { + "External id": 2939066,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015912919.249, "dur": 0.879, + "args": { + "External id": 2939067,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015912924.548, "dur": 39.753, + "args": { + "External id": 2939068,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015912925.358, "dur": 38.254, + "args": { + "External id": 2939069,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015912967.784, "dur": 13.223, + "args": { + "External id": 2939070,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913023.671, "dur": 5.302, + "args": { + "External id": 2939071,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913026.480, "dur": 1.163, + "args": { + "External id": 2939072,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015913033.991, "dur": 56.381, + "args": { + "External id": 2939073,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015913034.748, "dur": 7.015, + "args": { + "External id": 2939074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015913037.564, "dur": 3.716, + "args": { + "External id": 2939075,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913038.980, "dur": 2.108, + "args": { + "External id": 2939076,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015913042.461, "dur": 47.539, + "args": { + "External id": 2939077,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015913043.326, "dur": 46.039, + "args": { + "External id": 2939078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913094.440, "dur": 3.896, + "args": { + "External id": 2939079,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913096.365, "dur": 0.947, + "args": { + "External id": 2939080,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015913105.949, "dur": 1.447, + "args": { + "External id": 2939081,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015913114.277, "dur": 6.073, + "args": { + "External id": 2939082,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015913116.020, "dur": 4.078, + "args": { + "External id": 2939083,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015913222.437, "dur": 174.307, + "args": { + "External id": 2939084,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015913224.867, "dur": 2.857, + "args": { + "External id": 2939085,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015913229.525, "dur": 166.747, + "args": { + "External id": 2939086,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015913232.251, "dur": 0.363, + "args": { + "External id": 2939087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015913235.430, "dur": 21.671, + "args": { + "External id": 2939088,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015913258.767, "dur": 3.228, + "args": { + "External id": 2939089,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913260.817, "dur": 0.892, + "args": { + "External id": 2939090,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015913262.729, "dur": 21.002, + "args": { + "External id": 2939091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015913263.616, "dur": 2.802, + "args": { + "External id": 2939092,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015913267.588, "dur": 15.843, + "args": { + "External id": 2939093,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913270.275, "dur": 2.644, + "args": { + "External id": 2939094,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015913284.984, "dur": 19.517, + "args": { + "External id": 2939095,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913305.939, "dur": 12.260, + "args": { + "External id": 2939096,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015913322.389, "dur": 13.217, + "args": { + "External id": 2939097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913336.789, "dur": 11.224, + "args": { + "External id": 2939098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015913349.686, "dur": 18.558, + "args": { + "External id": 2939099,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913351.433, "dur": 1.705, + "args": { + "External id": 2939100,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913354.929, "dur": 0.979, + "args": { + "External id": 2939101,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913370.186, "dur": 11.709, + "args": { + "External id": 2939102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913384.630, "dur": 10.381, + "args": { + "External id": 2939103,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015913403.000, "dur": 1.810, + "args": { + "External id": 2939104,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913412.854, "dur": 3.345, + "args": { + "External id": 2939105,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913414.949, "dur": 0.507, + "args": { + "External id": 2939106,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015913475.911, "dur": 51.731, + "args": { + "External id": 2939107,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913532.315, "dur": 4.381, + "args": { + "External id": 2939108,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913534.594, "dur": 1.062, + "args": { + "External id": 2939109,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913537.904, "dur": 22.487, + "args": { + "External id": 2939110,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015913564.501, "dur": 7.707, + "args": { + "External id": 2939111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015913567.704, "dur": 3.919, + "args": { + "External id": 2939112,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913569.216, "dur": 2.213, + "args": { + "External id": 2939113,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015913574.809, "dur": 39.669, + "args": { + "External id": 2939114,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015913575.743, "dur": 38.055, + "args": { + "External id": 2939115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913618.088, "dur": 13.145, + "args": { + "External id": 2939116,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015913635.559, "dur": 22.699, + "args": { + "External id": 2939117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015913637.786, "dur": 20.016, + "args": { + "External id": 2939118,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913642.410, "dur": 0.857, + "args": { + "External id": 2939119,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015913664.978, "dur": 26.370, + "args": { + "External id": 2939120,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015913666.997, "dur": 24.150, + "args": { + "External id": 2939121,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913671.168, "dur": 4.183, + "args": { + "External id": 2939122,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913676.408, "dur": 14.318, + "args": { + "External id": 2939123,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015913705.815, "dur": 5.059, + "args": { + "External id": 2939124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015913707.944, "dur": 2.691, + "args": { + "External id": 2939125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015913711.955, "dur": 1.273, + "args": { + "External id": 2939126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015913712.629, "dur": 0.519, + "args": { + "External id": 2939127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913754.181, "dur": 21.413, + "args": { + "External id": 2939128,"Sequence number": 29294765, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "s", "id": 23, "pid": 1336755, "tid": 1336755, "ts": 1555015913754.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913780.847, "dur": 5.100, + "args": { + "External id": 2939129,"Sequence number": 29294766, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913783.791, "dur": 0.969, + "args": { + "External id": 2939130,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555015913788.432, "dur": 6.191, + "args": { + "External id": 2939131,"Sequence number": 29294766, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913792.472, "dur": 0.921, + "args": { + "External id": 2939132,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913795.928, "dur": 4.548, + "args": { + "External id": 2939133,"Sequence number": 29294766, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913798.808, "dur": 1.063, + "args": { + "External id": 2939134,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913804.374, "dur": 5.791, + "args": { + "External id": 2939135,"Sequence number": 29294766, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9110 + } + }, + { + "ph": "s", "id": 22, "pid": 1336755, "tid": 1336755, "ts": 1555015913804.374, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913807.485, "dur": 1.181, + "args": { + "External id": 2939136,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913811.155, "dur": 4.773, + "args": { + "External id": 2939137,"Sequence number": 29294767, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9112 + } + }, + { + "ph": "s", "id": 21, "pid": 1336755, "tid": 1336755, "ts": 1555015913811.155, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913814.454, "dur": 0.631, + "args": { + "External id": 2939138,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555015913818.321, "dur": 5.587, + "args": { + "External id": 2939139,"Sequence number": 29294768, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 9114 + } + }, + { + "ph": "s", "id": 20, "pid": 1336755, "tid": 1336755, "ts": 1555015913818.321, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913822.098, "dur": 0.934, + "args": { + "External id": 2939140,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015913824.941, "dur": 4.684, + "args": { + "External id": 2939141,"Sequence number": 29294769, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 9116 + } + }, + { + "ph": "s", "id": 19, "pid": 1336755, "tid": 1336755, "ts": 1555015913824.941, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913827.598, "dur": 1.111, + "args": { + "External id": 2939142,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555015913833.361, "dur": 26.514, + "args": { + "External id": 2939143,"Sequence number": 29294770, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555015913835.044, "dur": 24.633, + "args": { + "External id": 2939144,"Sequence number": 29294770, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015913837.801, "dur": 5.611, + "args": { + "External id": 2939145,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015913839.741, "dur": 3.146, + "args": { + "External id": 2939146,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913844.162, "dur": 15.087, + "args": { + "External id": 2939147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015913884.766, "dur": 3.414, + "args": { + "External id": 2939148,"Sequence number": 29294770, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 9123 + } + }, + { + "ph": "s", "id": 18, "pid": 1336755, "tid": 1336755, "ts": 1555015913884.766, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015913890.371, "dur": 1.140, + "args": { + "External id": 2939149,"Sequence number": 29294771, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015913923.921, "dur": 41928.452, + "args": { + "External id": 2939150,"Sequence number": 29294771, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "s", "id": 17, "pid": 1336755, "tid": 1336755, "ts": 1555015913923.921, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555015913937.625, "dur": 25.665, + "args": { + "External id": 2939151,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555015913938.523, "dur": 24.523, + "args": { + "External id": 2939152,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015913939.709, "dur": 6.333, + "args": { + "External id": 2939153,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015913942.488, "dur": 3.155, + "args": { + "External id": 2939154,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015913946.702, "dur": 15.910, + "args": { + "External id": 2939155,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015913978.659, "dur": 69.041, + "args": { + "External id": 2939156,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015913980.073, "dur": 47.493, + "args": { + "External id": 2939157,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015913981.652, "dur": 45.169, + "args": { + "External id": 2939158,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015914029.331, "dur": 18.148, + "args": { + "External id": 2939159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015914031.540, "dur": 15.556, + "args": { + "External id": 2939160,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015914052.225, "dur": 20.024, + "args": { + "External id": 2939161,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015914052.936, "dur": 5.185, + "args": { + "External id": 2939162,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015914054.468, "dur": 3.393, + "args": { + "External id": 2939163,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015914058.678, "dur": 13.394, + "args": { + "External id": 2939164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015914059.388, "dur": 12.312, + "args": { + "External id": 2939165,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555015914080.047, "dur": 18.182, + "args": { + "External id": 2939166,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015914082.049, "dur": 3.088, + "args": { + "External id": 2939167,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015914085.693, "dur": 12.247, + "args": { + "External id": 2939168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015914088.138, "dur": 9.495, + "args": { + "External id": 2939169,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1336755, + "ts": 1555015914102.569, "dur": 21.675, + "args": { + "External id": 2939170,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015914127.106, "dur": 85.893, + "args": { + "External id": 2939171,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015914159.387, "dur": 53.132, + "args": { + "External id": 2939172,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015914166.601, "dur": 2.639, + "args": { + "External id": 2939173,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015914170.635, "dur": 25.929, + "args": { + "External id": 2939174,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015914172.147, "dur": 24.218, + "args": { + "External id": 2939175,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015914174.413, "dur": 2.838, + "args": { + "External id": 2939176,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015914179.771, "dur": 16.252, + "args": { + "External id": 2939177,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1336755, + "ts": 1555015914217.493, "dur": 36158.563, + "args": { + "External id": 2939178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1336755, + "ts": 1555015914219.005, "dur": 36156.346, + "args": { + "External id": 2939179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015950385.947, "dur": 6.268, + "args": { + "External id": 2939180,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015950389.220, "dur": 1.255, + "args": { + "External id": 2939181,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015950397.096, "dur": 103.836, + "args": { + "External id": 2939182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015950398.740, "dur": 8.465, + "args": { + "External id": 2939183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015950403.242, "dur": 3.081, + "args": { + "External id": 2939184,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015950404.976, "dur": 1.052, + "args": { + "External id": 2939185,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015950408.500, "dur": 91.876, + "args": { + "External id": 2939186,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015950410.160, "dur": 89.474, + "args": { + "External id": 2939187,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015950504.551, "dur": 4.163, + "args": { + "External id": 2939188,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015950506.502, "dur": 0.861, + "args": { + "External id": 2939189,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015950515.973, "dur": 2.226, + "args": { + "External id": 2939190,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015950529.853, "dur": 6.269, + "args": { + "External id": 2939191,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015950531.884, "dur": 3.965, + "args": { + "External id": 2939192,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015950662.277, "dur": 192.612, + "args": { + "External id": 2939193,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015950667.090, "dur": 2.002, + "args": { + "External id": 2939194,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015950670.561, "dur": 183.772, + "args": { + "External id": 2939195,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015950672.201, "dur": 0.614, + "args": { + "External id": 2939196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015950675.967, "dur": 24.245, + "args": { + "External id": 2939197,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015950701.834, "dur": 5.106, + "args": { + "External id": 2939198,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015950705.696, "dur": 0.931, + "args": { + "External id": 2939199,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015950707.882, "dur": 22.537, + "args": { + "External id": 2939200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015950708.876, "dur": 1.392, + "args": { + "External id": 2939201,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015950711.563, "dur": 18.601, + "args": { + "External id": 2939202,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015950715.148, "dur": 3.324, + "args": { + "External id": 2939203,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015950732.125, "dur": 21.355, + "args": { + "External id": 2939204,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015950755.121, "dur": 17.040, + "args": { + "External id": 2939205,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015950775.456, "dur": 14.160, + "args": { + "External id": 2939206,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015950792.717, "dur": 12.399, + "args": { + "External id": 2939207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015950807.050, "dur": 19.924, + "args": { + "External id": 2939208,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015950808.828, "dur": 1.612, + "args": { + "External id": 2939209,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015950812.547, "dur": 0.833, + "args": { + "External id": 2939210,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015950828.806, "dur": 12.308, + "args": { + "External id": 2939211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015950842.369, "dur": 10.786, + "args": { + "External id": 2939212,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015950863.185, "dur": 1.667, + "args": { + "External id": 2939213,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015950871.292, "dur": 4.017, + "args": { + "External id": 2939214,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015950873.592, "dur": 0.723, + "args": { + "External id": 2939215,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015950946.703, "dur": 101.178, + "args": { + "External id": 2939216,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015951055.673, "dur": 6.973, + "args": { + "External id": 2939217,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951058.801, "dur": 0.901, + "args": { + "External id": 2939218,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951064.252, "dur": 33.181, + "args": { + "External id": 2939219,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015951103.491, "dur": 7.653, + "args": { + "External id": 2939220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015951106.788, "dur": 3.590, + "args": { + "External id": 2939221,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951108.673, "dur": 1.442, + "args": { + "External id": 2939222,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015951114.829, "dur": 64.229, + "args": { + "External id": 2939223,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015951116.256, "dur": 61.751, + "args": { + "External id": 2939224,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951184.895, "dur": 16.699, + "args": { + "External id": 2939225,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015951208.152, "dur": 6.279, + "args": { + "External id": 2939226,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951212.278, "dur": 1.080, + "args": { + "External id": 2939227,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015951218.874, "dur": 58.421, + "args": { + "External id": 2939228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015951219.674, "dur": 4.317, + "args": { + "External id": 2939229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015951220.905, "dur": 2.483, + "args": { + "External id": 2939230,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951222.463, "dur": 0.760, + "args": { + "External id": 2939231,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015951224.837, "dur": 52.052, + "args": { + "External id": 2939232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015951229.773, "dur": 46.635, + "args": { + "External id": 2939233,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015951283.001, "dur": 3.913, + "args": { + "External id": 2939234,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951284.702, "dur": 0.887, + "args": { + "External id": 2939235,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015951293.827, "dur": 1.771, + "args": { + "External id": 2939236,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015951302.978, "dur": 7.929, + "args": { + "External id": 2939237,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015951305.340, "dur": 5.240, + "args": { + "External id": 2939238,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015951405.921, "dur": 180.128, + "args": { + "External id": 2939239,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015951408.266, "dur": 2.206, + "args": { + "External id": 2939240,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015951411.851, "dur": 173.656, + "args": { + "External id": 2939241,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015951413.142, "dur": 0.342, + "args": { + "External id": 2939242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015951414.582, "dur": 23.869, + "args": { + "External id": 2939243,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015951442.401, "dur": 3.410, + "args": { + "External id": 2939244,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951444.520, "dur": 1.038, + "args": { + "External id": 2939245,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015951446.900, "dur": 24.357, + "args": { + "External id": 2939246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015951449.315, "dur": 3.179, + "args": { + "External id": 2939247,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015951453.742, "dur": 17.177, + "args": { + "External id": 2939248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951456.477, "dur": 2.969, + "args": { + "External id": 2939249,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015951472.589, "dur": 21.146, + "args": { + "External id": 2939250,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951495.046, "dur": 12.573, + "args": { + "External id": 2939251,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015951510.772, "dur": 13.880, + "args": { + "External id": 2939252,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951525.703, "dur": 11.500, + "args": { + "External id": 2939253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015951539.056, "dur": 20.228, + "args": { + "External id": 2939254,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951540.738, "dur": 1.507, + "args": { + "External id": 2939255,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951545.741, "dur": 1.066, + "args": { + "External id": 2939256,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951560.892, "dur": 11.482, + "args": { + "External id": 2939257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951573.426, "dur": 10.841, + "args": { + "External id": 2939258,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015951593.083, "dur": 1.716, + "args": { + "External id": 2939259,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015951603.379, "dur": 3.375, + "args": { + "External id": 2939260,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951605.607, "dur": 0.395, + "args": { + "External id": 2939261,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015951671.200, "dur": 50.730, + "args": { + "External id": 2939262,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015951726.806, "dur": 6.364, + "args": { + "External id": 2939263,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951730.378, "dur": 1.806, + "args": { + "External id": 2939264,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951734.476, "dur": 22.612, + "args": { + "External id": 2939265,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015951761.466, "dur": 6.555, + "args": { + "External id": 2939266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015951762.817, "dur": 4.368, + "args": { + "External id": 2939267,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951764.676, "dur": 2.245, + "args": { + "External id": 2939268,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015951770.672, "dur": 42.327, + "args": { + "External id": 2939269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015951773.556, "dur": 38.673, + "args": { + "External id": 2939270,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015951816.863, "dur": 14.393, + "args": { + "External id": 2939271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015951836.501, "dur": 3.227, + "args": { + "External id": 2939272,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951838.241, "dur": 0.589, + "args": { + "External id": 2939273,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015951843.165, "dur": 47.252, + "args": { + "External id": 2939274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015951843.896, "dur": 4.807, + "args": { + "External id": 2939275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015951844.693, "dur": 3.512, + "args": { + "External id": 2939276,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951847.169, "dur": 0.867, + "args": { + "External id": 2939277,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015951849.367, "dur": 40.686, + "args": { + "External id": 2939278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015951850.125, "dur": 39.379, + "args": { + "External id": 2939279,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015951894.688, "dur": 3.322, + "args": { + "External id": 2939280,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015951896.458, "dur": 0.566, + "args": { + "External id": 2939281,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015951903.141, "dur": 1.341, + "args": { + "External id": 2939282,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015951911.397, "dur": 7.120, + "args": { + "External id": 2939283,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015951914.577, "dur": 3.606, + "args": { + "External id": 2939284,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015952035.366, "dur": 198.327, + "args": { + "External id": 2939285,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015952037.693, "dur": 3.139, + "args": { + "External id": 2939286,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015952044.020, "dur": 189.258, + "args": { + "External id": 2939287,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015952045.423, "dur": 0.456, + "args": { + "External id": 2939288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015952046.864, "dur": 25.616, + "args": { + "External id": 2939289,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015952074.373, "dur": 4.706, + "args": { + "External id": 2939290,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952076.359, "dur": 2.368, + "args": { + "External id": 2939291,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015952079.983, "dur": 21.732, + "args": { + "External id": 2939292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015952082.419, "dur": 1.329, + "args": { + "External id": 2939293,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015952084.749, "dur": 16.623, + "args": { + "External id": 2939294,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952087.411, "dur": 2.630, + "args": { + "External id": 2939295,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015952103.233, "dur": 20.190, + "args": { + "External id": 2939296,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952124.792, "dur": 12.292, + "args": { + "External id": 2939297,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015952139.951, "dur": 29.287, + "args": { + "External id": 2939298,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952171.129, "dur": 12.418, + "args": { + "External id": 2939299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015952185.281, "dur": 21.266, + "args": { + "External id": 2939300,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952187.305, "dur": 1.966, + "args": { + "External id": 2939301,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952192.615, "dur": 1.118, + "args": { + "External id": 2939302,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952208.013, "dur": 11.889, + "args": { + "External id": 2939303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952221.114, "dur": 10.717, + "args": { + "External id": 2939304,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015952241.421, "dur": 2.045, + "args": { + "External id": 2939305,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015952252.770, "dur": 3.586, + "args": { + "External id": 2939306,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952255.088, "dur": 0.518, + "args": { + "External id": 2939307,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015952325.350, "dur": 84.075, + "args": { + "External id": 2939308,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015952415.843, "dur": 6.131, + "args": { + "External id": 2939309,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952418.460, "dur": 2.406, + "args": { + "External id": 2939310,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952423.432, "dur": 27.887, + "args": { + "External id": 2939311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015952455.745, "dur": 4.728, + "args": { + "External id": 2939312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015952457.130, "dur": 2.601, + "args": { + "External id": 2939313,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952458.629, "dur": 0.885, + "args": { + "External id": 2939314,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015952464.999, "dur": 80.829, + "args": { + "External id": 2939315,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015952466.110, "dur": 79.158, + "args": { + "External id": 2939316,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952549.635, "dur": 23.916, + "args": { + "External id": 2939317,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015952579.163, "dur": 3.352, + "args": { + "External id": 2939318,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952581.084, "dur": 0.524, + "args": { + "External id": 2939319,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015952586.226, "dur": 49.796, + "args": { + "External id": 2939320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015952587.057, "dur": 4.669, + "args": { + "External id": 2939321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015952589.448, "dur": 1.814, + "args": { + "External id": 2939322,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952590.513, "dur": 0.583, + "args": { + "External id": 2939323,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015952592.335, "dur": 43.358, + "args": { + "External id": 2939324,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015952592.850, "dur": 42.342, + "args": { + "External id": 2939325,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015952639.991, "dur": 3.351, + "args": { + "External id": 2939326,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952641.702, "dur": 0.567, + "args": { + "External id": 2939327,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015952650.310, "dur": 1.289, + "args": { + "External id": 2939328,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015952658.713, "dur": 5.852, + "args": { + "External id": 2939329,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015952660.412, "dur": 3.853, + "args": { + "External id": 2939330,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015952742.665, "dur": 168.488, + "args": { + "External id": 2939331,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015952744.511, "dur": 3.563, + "args": { + "External id": 2939332,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015952749.631, "dur": 160.982, + "args": { + "External id": 2939333,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015952752.417, "dur": 0.366, + "args": { + "External id": 2939334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015952753.883, "dur": 18.483, + "args": { + "External id": 2939335,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015952774.126, "dur": 4.932, + "args": { + "External id": 2939336,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952777.865, "dur": 0.922, + "args": { + "External id": 2939337,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015952779.982, "dur": 19.525, + "args": { + "External id": 2939338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015952780.827, "dur": 1.253, + "args": { + "External id": 2939339,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015952783.138, "dur": 16.109, + "args": { + "External id": 2939340,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952785.066, "dur": 2.409, + "args": { + "External id": 2939341,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015952800.662, "dur": 18.221, + "args": { + "External id": 2939342,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952820.363, "dur": 11.807, + "args": { + "External id": 2939343,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015952836.574, "dur": 13.534, + "args": { + "External id": 2939344,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952851.059, "dur": 11.040, + "args": { + "External id": 2939345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015952863.781, "dur": 20.007, + "args": { + "External id": 2939346,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952867.696, "dur": 1.562, + "args": { + "External id": 2939347,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952871.175, "dur": 0.850, + "args": { + "External id": 2939348,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952885.180, "dur": 11.228, + "args": { + "External id": 2939349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015952898.968, "dur": 10.440, + "args": { + "External id": 2939350,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015952916.790, "dur": 1.559, + "args": { + "External id": 2939351,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015952926.096, "dur": 2.849, + "args": { + "External id": 2939352,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015952927.977, "dur": 0.328, + "args": { + "External id": 2939353,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015953036.234, "dur": 58.421, + "args": { + "External id": 2939354,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015953100.221, "dur": 5.494, + "args": { + "External id": 2939355,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953103.092, "dur": 1.293, + "args": { + "External id": 2939356,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953107.014, "dur": 24.464, + "args": { + "External id": 2939357,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015953136.158, "dur": 19.954, + "args": { + "External id": 2939358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015953139.233, "dur": 15.809, + "args": { + "External id": 2939359,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953140.769, "dur": 13.601, + "args": { + "External id": 2939360,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015953159.999, "dur": 46.325, + "args": { + "External id": 2939361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015953161.236, "dur": 44.403, + "args": { + "External id": 2939362,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953210.673, "dur": 15.186, + "args": { + "External id": 2939363,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015953232.178, "dur": 5.796, + "args": { + "External id": 2939364,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953236.123, "dur": 0.911, + "args": { + "External id": 2939365,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015953241.695, "dur": 46.383, + "args": { + "External id": 2939366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015953242.452, "dur": 3.464, + "args": { + "External id": 2939367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015953243.077, "dur": 2.388, + "args": { + "External id": 2939368,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953244.530, "dur": 0.786, + "args": { + "External id": 2939369,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015953246.338, "dur": 41.316, + "args": { + "External id": 2939370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015953246.978, "dur": 40.151, + "args": { + "External id": 2939371,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015953292.365, "dur": 3.443, + "args": { + "External id": 2939372,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953294.033, "dur": 0.591, + "args": { + "External id": 2939373,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015953303.778, "dur": 1.565, + "args": { + "External id": 2939374,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015953312.538, "dur": 6.842, + "args": { + "External id": 2939375,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015953314.298, "dur": 4.822, + "args": { + "External id": 2939376,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015953402.066, "dur": 213.127, + "args": { + "External id": 2939377,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015953405.866, "dur": 2.295, + "args": { + "External id": 2939378,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015953411.278, "dur": 203.258, + "args": { + "External id": 2939379,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015953412.712, "dur": 0.473, + "args": { + "External id": 2939380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015953414.162, "dur": 19.585, + "args": { + "External id": 2939381,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015953435.601, "dur": 4.708, + "args": { + "External id": 2939382,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953439.127, "dur": 0.943, + "args": { + "External id": 2939383,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015953441.177, "dur": 19.854, + "args": { + "External id": 2939384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015953442.468, "dur": 1.455, + "args": { + "External id": 2939385,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015953445.201, "dur": 15.558, + "args": { + "External id": 2939386,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953447.206, "dur": 2.573, + "args": { + "External id": 2939387,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015953462.286, "dur": 20.268, + "args": { + "External id": 2939388,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953484.099, "dur": 12.176, + "args": { + "External id": 2939389,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015953500.484, "dur": 12.783, + "args": { + "External id": 2939390,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953514.400, "dur": 12.018, + "args": { + "External id": 2939391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015953527.968, "dur": 58.639, + "args": { + "External id": 2939392,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953564.372, "dur": 6.512, + "args": { + "External id": 2939393,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953572.674, "dur": 0.641, + "args": { + "External id": 2939394,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953589.871, "dur": 11.216, + "args": { + "External id": 2939395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953602.066, "dur": 11.324, + "args": { + "External id": 2939396,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015953621.812, "dur": 1.752, + "args": { + "External id": 2939397,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015953631.025, "dur": 3.230, + "args": { + "External id": 2939398,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953633.100, "dur": 0.389, + "args": { + "External id": 2939399,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015953697.628, "dur": 53.344, + "args": { + "External id": 2939400,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015953755.436, "dur": 4.091, + "args": { + "External id": 2939401,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953757.558, "dur": 0.893, + "args": { + "External id": 2939402,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953762.796, "dur": 23.470, + "args": { + "External id": 2939403,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015953790.157, "dur": 6.095, + "args": { + "External id": 2939404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015953791.426, "dur": 4.039, + "args": { + "External id": 2939405,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953792.927, "dur": 2.259, + "args": { + "External id": 2939406,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015953798.674, "dur": 39.562, + "args": { + "External id": 2939407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015953799.690, "dur": 37.800, + "args": { + "External id": 2939408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015953842.101, "dur": 13.898, + "args": { + "External id": 2939409,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015953863.072, "dur": 3.453, + "args": { + "External id": 2939410,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953864.863, "dur": 0.960, + "args": { + "External id": 2939411,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015953870.149, "dur": 47.444, + "args": { + "External id": 2939412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015953870.692, "dur": 3.166, + "args": { + "External id": 2939413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015953871.342, "dur": 1.806, + "args": { + "External id": 2939414,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953872.338, "dur": 0.670, + "args": { + "External id": 2939415,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015953876.084, "dur": 41.081, + "args": { + "External id": 2939416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015953876.542, "dur": 40.046, + "args": { + "External id": 2939417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015953921.587, "dur": 3.813, + "args": { + "External id": 2939418,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015953923.518, "dur": 0.795, + "args": { + "External id": 2939419,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015953930.125, "dur": 1.225, + "args": { + "External id": 2939420,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015953939.022, "dur": 7.565, + "args": { + "External id": 2939421,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015953942.000, "dur": 4.297, + "args": { + "External id": 2939422,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015954068.014, "dur": 206.506, + "args": { + "External id": 2939423,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015954070.403, "dur": 3.008, + "args": { + "External id": 2939424,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015954076.976, "dur": 197.011, + "args": { + "External id": 2939425,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015954078.362, "dur": 0.379, + "args": { + "External id": 2939426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015954079.722, "dur": 27.701, + "args": { + "External id": 2939427,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015954108.792, "dur": 4.861, + "args": { + "External id": 2939428,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954110.683, "dur": 2.644, + "args": { + "External id": 2939429,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015954116.389, "dur": 22.449, + "args": { + "External id": 2939430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015954117.523, "dur": 1.650, + "args": { + "External id": 2939431,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015954120.339, "dur": 18.158, + "args": { + "External id": 2939432,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954123.067, "dur": 2.509, + "args": { + "External id": 2939433,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015954140.102, "dur": 37.622, + "args": { + "External id": 2939434,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954180.071, "dur": 13.077, + "args": { + "External id": 2939435,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015954195.999, "dur": 13.596, + "args": { + "External id": 2939436,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954210.923, "dur": 11.851, + "args": { + "External id": 2939437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015954224.682, "dur": 23.292, + "args": { + "External id": 2939438,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954228.353, "dur": 1.760, + "args": { + "External id": 2939439,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954231.986, "dur": 2.792, + "args": { + "External id": 2939440,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954249.439, "dur": 11.592, + "args": { + "External id": 2939441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954262.082, "dur": 10.747, + "args": { + "External id": 2939442,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015954282.513, "dur": 2.138, + "args": { + "External id": 2939443,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015954294.204, "dur": 3.412, + "args": { + "External id": 2939444,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954296.162, "dur": 0.473, + "args": { + "External id": 2939445,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015954363.210, "dur": 57.395, + "args": { + "External id": 2939446,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015954427.518, "dur": 4.134, + "args": { + "External id": 2939447,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954429.763, "dur": 0.848, + "args": { + "External id": 2939448,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954433.091, "dur": 25.363, + "args": { + "External id": 2939449,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015954462.749, "dur": 6.008, + "args": { + "External id": 2939450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015954464.503, "dur": 3.477, + "args": { + "External id": 2939451,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954466.234, "dur": 1.506, + "args": { + "External id": 2939452,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015954473.125, "dur": 40.331, + "args": { + "External id": 2939453,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015954474.185, "dur": 38.599, + "args": { + "External id": 2939454,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954516.949, "dur": 13.506, + "args": { + "External id": 2939455,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015954535.876, "dur": 3.604, + "args": { + "External id": 2939456,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954537.844, "dur": 0.807, + "args": { + "External id": 2939457,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015954542.991, "dur": 48.105, + "args": { + "External id": 2939458,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015954543.728, "dur": 4.909, + "args": { + "External id": 2939459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015954546.400, "dur": 1.788, + "args": { + "External id": 2939460,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954547.398, "dur": 0.676, + "args": { + "External id": 2939461,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015954549.276, "dur": 41.431, + "args": { + "External id": 2939462,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015954549.748, "dur": 40.484, + "args": { + "External id": 2939463,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015954595.309, "dur": 3.344, + "args": { + "External id": 2939464,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954597.039, "dur": 0.531, + "args": { + "External id": 2939465,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015954603.665, "dur": 1.361, + "args": { + "External id": 2939466,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015954613.803, "dur": 6.147, + "args": { + "External id": 2939467,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015954615.686, "dur": 3.997, + "args": { + "External id": 2939468,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015954696.640, "dur": 172.769, + "args": { + "External id": 2939469,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015954700.401, "dur": 1.997, + "args": { + "External id": 2939470,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015954703.728, "dur": 165.203, + "args": { + "External id": 2939471,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015954704.961, "dur": 0.390, + "args": { + "External id": 2939472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015954706.211, "dur": 18.772, + "args": { + "External id": 2939473,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015954726.501, "dur": 4.423, + "args": { + "External id": 2939474,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954729.718, "dur": 0.918, + "args": { + "External id": 2939475,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015954739.518, "dur": 19.983, + "args": { + "External id": 2939476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015954740.726, "dur": 1.427, + "args": { + "External id": 2939477,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015954743.401, "dur": 15.765, + "args": { + "External id": 2939478,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954745.703, "dur": 2.382, + "args": { + "External id": 2939479,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015954760.679, "dur": 17.607, + "args": { + "External id": 2939480,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954779.719, "dur": 13.808, + "args": { + "External id": 2939481,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015954796.051, "dur": 12.534, + "args": { + "External id": 2939482,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954809.882, "dur": 11.369, + "args": { + "External id": 2939483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015954822.680, "dur": 21.061, + "args": { + "External id": 2939484,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954826.531, "dur": 1.674, + "args": { + "External id": 2939485,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954829.907, "dur": 0.837, + "args": { + "External id": 2939486,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954845.089, "dur": 10.809, + "args": { + "External id": 2939487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015954856.753, "dur": 11.076, + "args": { + "External id": 2939488,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015954875.465, "dur": 1.459, + "args": { + "External id": 2939489,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015954884.274, "dur": 2.866, + "args": { + "External id": 2939490,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015954886.105, "dur": 0.365, + "args": { + "External id": 2939491,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015954945.445, "dur": 85.887, + "args": { + "External id": 2939492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955037.805, "dur": 5.698, + "args": { + "External id": 2939493,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955040.812, "dur": 1.386, + "args": { + "External id": 2939494,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955048.063, "dur": 24.314, + "args": { + "External id": 2939495,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015955077.372, "dur": 8.681, + "args": { + "External id": 2939496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015955079.067, "dur": 6.342, + "args": { + "External id": 2939497,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955082.474, "dur": 2.722, + "args": { + "External id": 2939498,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015955088.916, "dur": 45.229, + "args": { + "External id": 2939499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015955089.999, "dur": 43.609, + "args": { + "External id": 2939500,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955137.907, "dur": 30.337, + "args": { + "External id": 2939501,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955176.022, "dur": 4.321, + "args": { + "External id": 2939502,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955178.462, "dur": 0.883, + "args": { + "External id": 2939503,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015955186.223, "dur": 52.196, + "args": { + "External id": 2939504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015955187.318, "dur": 3.404, + "args": { + "External id": 2939505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015955188.073, "dur": 2.173, + "args": { + "External id": 2939506,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955189.315, "dur": 0.798, + "args": { + "External id": 2939507,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015955191.395, "dur": 46.612, + "args": { + "External id": 2939508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015955192.377, "dur": 45.149, + "args": { + "External id": 2939509,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955243.722, "dur": 3.803, + "args": { + "External id": 2939510,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955245.673, "dur": 0.734, + "args": { + "External id": 2939511,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015955254.052, "dur": 1.542, + "args": { + "External id": 2939512,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015955263.734, "dur": 6.633, + "args": { + "External id": 2939513,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015955265.560, "dur": 4.565, + "args": { + "External id": 2939514,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015955352.667, "dur": 171.036, + "args": { + "External id": 2939515,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015955354.567, "dur": 2.326, + "args": { + "External id": 2939516,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015955359.935, "dur": 163.292, + "args": { + "External id": 2939517,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015955361.033, "dur": 0.480, + "args": { + "External id": 2939518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015955362.735, "dur": 21.049, + "args": { + "External id": 2939519,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015955385.266, "dur": 4.588, + "args": { + "External id": 2939520,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955386.976, "dur": 2.446, + "args": { + "External id": 2939521,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015955390.577, "dur": 20.992, + "args": { + "External id": 2939522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015955391.749, "dur": 1.440, + "args": { + "External id": 2939523,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015955394.199, "dur": 17.101, + "args": { + "External id": 2939524,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955398.511, "dur": 2.541, + "args": { + "External id": 2939525,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015955412.893, "dur": 18.916, + "args": { + "External id": 2939526,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955433.375, "dur": 12.522, + "args": { + "External id": 2939527,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015955448.674, "dur": 12.870, + "args": { + "External id": 2939528,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955462.713, "dur": 11.176, + "args": { + "External id": 2939529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015955475.561, "dur": 19.896, + "args": { + "External id": 2939530,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955477.268, "dur": 1.492, + "args": { + "External id": 2939531,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955480.368, "dur": 2.233, + "args": { + "External id": 2939532,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955498.485, "dur": 11.721, + "args": { + "External id": 2939533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955511.080, "dur": 11.143, + "args": { + "External id": 2939534,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015955529.970, "dur": 1.530, + "args": { + "External id": 2939535,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955539.536, "dur": 3.089, + "args": { + "External id": 2939536,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955541.358, "dur": 0.407, + "args": { + "External id": 2939537,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015955598.350, "dur": 48.979, + "args": { + "External id": 2939538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955651.766, "dur": 3.969, + "args": { + "External id": 2939539,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955653.915, "dur": 0.798, + "args": { + "External id": 2939540,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955658.500, "dur": 23.186, + "args": { + "External id": 2939541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015955685.758, "dur": 4.836, + "args": { + "External id": 2939542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015955687.054, "dur": 2.983, + "args": { + "External id": 2939543,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955688.544, "dur": 1.323, + "args": { + "External id": 2939544,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015955692.920, "dur": 41.482, + "args": { + "External id": 2939545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015955693.936, "dur": 39.803, + "args": { + "External id": 2939546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955738.253, "dur": 13.169, + "args": { + "External id": 2939547,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015955757.910, "dur": 22.008, + "args": { + "External id": 2939548,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015955760.177, "dur": 19.351, + "args": { + "External id": 2939549,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955764.768, "dur": 1.024, + "args": { + "External id": 2939550,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015955784.428, "dur": 24.142, + "args": { + "External id": 2939551,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015955786.178, "dur": 22.181, + "args": { + "External id": 2939552,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955790.457, "dur": 3.870, + "args": { + "External id": 2939553,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955795.339, "dur": 12.558, + "args": { + "External id": 2939554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015955821.459, "dur": 4.286, + "args": { + "External id": 2939555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015955823.025, "dur": 2.494, + "args": { + "External id": 2939556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015955826.845, "dur": 2.974, + "args": { + "External id": 2939557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555015955828.768, "dur": 0.825, + "args": { + "External id": 2939558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955866.981, "dur": 20.075, + "args": { + "External id": 2939559,"Sequence number": 29294772, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955889.058, "dur": 12.845, + "args": { + "External id": 2939560,"Sequence number": 29294773, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9535 + } + }, + { + "ph": "s", "id": 16, "pid": 1336755, "tid": 1336755, "ts": 1555015955889.058, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955907.132, "dur": 5.064, + "args": { + "External id": 2939561,"Sequence number": 29294774, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955909.940, "dur": 1.145, + "args": { + "External id": 2939562,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555015955914.436, "dur": 7.834, + "args": { + "External id": 2939563,"Sequence number": 29294774, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955920.125, "dur": 1.008, + "args": { + "External id": 2939564,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955923.489, "dur": 2.840, + "args": { + "External id": 2939565,"Sequence number": 29294774, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955925.120, "dur": 0.641, + "args": { + "External id": 2939566,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955930.181, "dur": 6.112, + "args": { + "External id": 2939567,"Sequence number": 29294774, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9542 + } + }, + { + "ph": "s", "id": 15, "pid": 1336755, "tid": 1336755, "ts": 1555015955930.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955933.826, "dur": 0.952, + "args": { + "External id": 2939568,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955937.169, "dur": 5.635, + "args": { + "External id": 2939569,"Sequence number": 29294775, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9544 + } + }, + { + "ph": "s", "id": 14, "pid": 1336755, "tid": 1336755, "ts": 1555015955937.169, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955941.365, "dur": 0.620, + "args": { + "External id": 2939570,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555015955943.893, "dur": 4.592, + "args": { + "External id": 2939571,"Sequence number": 29294776, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 9546 + } + }, + { + "ph": "s", "id": 13, "pid": 1336755, "tid": 1336755, "ts": 1555015955943.893, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955946.983, "dur": 0.688, + "args": { + "External id": 2939572,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015955949.524, "dur": 4.801, + "args": { + "External id": 2939573,"Sequence number": 29294777, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 9548 + } + }, + { + "ph": "s", "id": 12, "pid": 1336755, "tid": 1336755, "ts": 1555015955949.524, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015955952.440, "dur": 1.129, + "args": { + "External id": 2939574,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555015955957.944, "dur": 73.531, + "args": { + "External id": 2939575,"Sequence number": 29294778, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555015955959.666, "dur": 71.565, + "args": { + "External id": 2939576,"Sequence number": 29294778, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015955961.886, "dur": 7.271, + "args": { + "External id": 2939577,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 9552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015955965.246, "dur": 3.238, + "args": { + "External id": 2939578,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015955970.070, "dur": 60.289, + "args": { + "External id": 2939579,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015956058.345, "dur": 6.614, + "args": { + "External id": 2939580,"Sequence number": 29294778, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 9555 + } + }, + { + "ph": "s", "id": 11, "pid": 1336755, "tid": 1336755, "ts": 1555015956058.345, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015956067.111, "dur": 1.233, + "args": { + "External id": 2939581,"Sequence number": 29294779, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555015956100.175, "dur": 44252.026, + "args": { + "External id": 2939582,"Sequence number": 29294779, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 9557 + } + }, + { + "ph": "s", "id": 10, "pid": 1336755, "tid": 1336755, "ts": 1555015956100.175, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555015956114.330, "dur": 46.105, + "args": { + "External id": 2939583,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555015956116.804, "dur": 43.432, + "args": { + "External id": 2939584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015956118.501, "dur": 6.830, + "args": { + "External id": 2939585,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015956120.144, "dur": 4.701, + "args": { + "External id": 2939586,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956126.422, "dur": 33.110, + "args": { + "External id": 2939587,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015956178.140, "dur": 26.270, + "args": { + "External id": 2939588,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015956179.131, "dur": 6.453, + "args": { + "External id": 2939589,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015956181.158, "dur": 4.102, + "args": { + "External id": 2939590,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956186.919, "dur": 17.269, + "args": { + "External id": 2939591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956188.620, "dur": 15.210, + "args": { + "External id": 2939592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015956207.907, "dur": 19.198, + "args": { + "External id": 2939593,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555015956208.818, "dur": 4.538, + "args": { + "External id": 2939594,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015956210.304, "dur": 2.785, + "args": { + "External id": 2939595,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956215.401, "dur": 11.519, + "args": { + "External id": 2939596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956215.942, "dur": 10.615, + "args": { + "External id": 2939597,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555015956232.718, "dur": 17.612, + "args": { + "External id": 2939598,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015956236.171, "dur": 2.800, + "args": { + "External id": 2939599,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956239.622, "dur": 10.458, + "args": { + "External id": 2939600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956240.343, "dur": 9.459, + "args": { + "External id": 2939601,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1336755, + "ts": 1555015956254.779, "dur": 22.343, + "args": { + "External id": 2939602,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015956279.584, "dur": 48.817, + "args": { + "External id": 2939603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015956283.518, "dur": 44.451, + "args": { + "External id": 2939604,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015956288.566, "dur": 0.900, + "args": { + "External id": 2939605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555015956290.555, "dur": 22.788, + "args": { + "External id": 2939606,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555015956292.472, "dur": 20.672, + "args": { + "External id": 2939607,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015956296.704, "dur": 2.931, + "args": { + "External id": 2939608,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015956300.514, "dur": 12.150, + "args": { + "External id": 2939609,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1336755, + "ts": 1555015956332.195, "dur": 38549.035, + "args": { + "External id": 2939610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1336755, + "ts": 1555015956333.707, "dur": 38546.115, + "args": { + "External id": 2939611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015994892.039, "dur": 7.006, + "args": { + "External id": 2939612,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015994896.251, "dur": 1.253, + "args": { + "External id": 2939613,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015994905.581, "dur": 131.143, + "args": { + "External id": 2939614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015994907.122, "dur": 5.810, + "args": { + "External id": 2939615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015994909.194, "dur": 2.858, + "args": { + "External id": 2939616,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015994910.730, "dur": 1.060, + "args": { + "External id": 2939617,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015994914.065, "dur": 121.987, + "args": { + "External id": 2939618,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015994915.708, "dur": 119.231, + "args": { + "External id": 2939619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015995041.942, "dur": 5.353, + "args": { + "External id": 2939620,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995044.765, "dur": 0.994, + "args": { + "External id": 2939621,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015995056.262, "dur": 2.809, + "args": { + "External id": 2939622,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995068.332, "dur": 6.377, + "args": { + "External id": 2939623,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995070.030, "dur": 4.418, + "args": { + "External id": 2939624,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015995218.730, "dur": 196.499, + "args": { + "External id": 2939625,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995221.510, "dur": 3.050, + "args": { + "External id": 2939626,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015995228.183, "dur": 186.533, + "args": { + "External id": 2939627,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015995231.878, "dur": 0.564, + "args": { + "External id": 2939628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015995233.774, "dur": 25.515, + "args": { + "External id": 2939629,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015995261.198, "dur": 3.937, + "args": { + "External id": 2939630,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995263.685, "dur": 0.957, + "args": { + "External id": 2939631,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015995266.374, "dur": 24.354, + "args": { + "External id": 2939632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995269.280, "dur": 1.594, + "args": { + "External id": 2939633,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015995272.095, "dur": 18.360, + "args": { + "External id": 2939634,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995275.667, "dur": 3.106, + "args": { + "External id": 2939635,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015995294.058, "dur": 21.491, + "args": { + "External id": 2939636,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995317.502, "dur": 14.062, + "args": { + "External id": 2939637,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015995334.861, "dur": 14.479, + "args": { + "External id": 2939638,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995350.827, "dur": 12.842, + "args": { + "External id": 2939639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015995365.536, "dur": 19.910, + "args": { + "External id": 2939640,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995367.951, "dur": 1.946, + "args": { + "External id": 2939641,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995371.627, "dur": 0.869, + "args": { + "External id": 2939642,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995388.656, "dur": 12.287, + "args": { + "External id": 2939643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995402.300, "dur": 11.283, + "args": { + "External id": 2939644,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015995422.085, "dur": 2.247, + "args": { + "External id": 2939645,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015995431.435, "dur": 4.059, + "args": { + "External id": 2939646,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995433.733, "dur": 0.904, + "args": { + "External id": 2939647,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015995510.011, "dur": 69.772, + "args": { + "External id": 2939648,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015995585.377, "dur": 7.427, + "args": { + "External id": 2939649,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995587.928, "dur": 2.473, + "args": { + "External id": 2939650,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995596.056, "dur": 26.634, + "args": { + "External id": 2939651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015995627.836, "dur": 5.455, + "args": { + "External id": 2939652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015995629.579, "dur": 2.969, + "args": { + "External id": 2939653,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995631.373, "dur": 0.887, + "args": { + "External id": 2939654,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015995636.454, "dur": 46.069, + "args": { + "External id": 2939655,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015995637.786, "dur": 43.857, + "args": { + "External id": 2939656,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995686.209, "dur": 14.666, + "args": { + "External id": 2939657,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015995708.307, "dur": 3.780, + "args": { + "External id": 2939658,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995710.469, "dur": 0.640, + "args": { + "External id": 2939659,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015995715.928, "dur": 50.217, + "args": { + "External id": 2939660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015995716.951, "dur": 3.476, + "args": { + "External id": 2939661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015995717.645, "dur": 2.164, + "args": { + "External id": 2939662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995718.957, "dur": 0.719, + "args": { + "External id": 2939663,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015995722.651, "dur": 43.157, + "args": { + "External id": 2939664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015995723.325, "dur": 41.790, + "args": { + "External id": 2939665,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015995770.103, "dur": 3.568, + "args": { + "External id": 2939666,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995771.934, "dur": 0.735, + "args": { + "External id": 2939667,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015995779.233, "dur": 1.589, + "args": { + "External id": 2939668,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995788.257, "dur": 7.624, + "args": { + "External id": 2939669,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995792.075, "dur": 3.539, + "args": { + "External id": 2939670,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015995885.673, "dur": 214.285, + "args": { + "External id": 2939671,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995887.364, "dur": 3.456, + "args": { + "External id": 2939672,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015995892.524, "dur": 206.771, + "args": { + "External id": 2939673,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015995893.769, "dur": 0.408, + "args": { + "External id": 2939674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015995895.198, "dur": 18.771, + "args": { + "External id": 2939675,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015995915.974, "dur": 4.844, + "args": { + "External id": 2939676,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015995919.987, "dur": 0.546, + "args": { + "External id": 2939677,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015995923.295, "dur": 19.801, + "args": { + "External id": 2939678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015995924.255, "dur": 1.293, + "args": { + "External id": 2939679,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015995926.614, "dur": 16.215, + "args": { + "External id": 2939680,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995928.897, "dur": 2.562, + "args": { + "External id": 2939681,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015995944.534, "dur": 19.353, + "args": { + "External id": 2939682,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015995965.375, "dur": 12.177, + "args": { + "External id": 2939683,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015995980.208, "dur": 51.666, + "args": { + "External id": 2939684,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996034.233, "dur": 12.011, + "args": { + "External id": 2939685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015996048.063, "dur": 24.381, + "args": { + "External id": 2939686,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996051.824, "dur": 1.810, + "args": { + "External id": 2939687,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996056.163, "dur": 2.510, + "args": { + "External id": 2939688,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996074.124, "dur": 11.710, + "args": { + "External id": 2939689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996086.920, "dur": 10.831, + "args": { + "External id": 2939690,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015996108.217, "dur": 2.247, + "args": { + "External id": 2939691,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015996120.004, "dur": 3.819, + "args": { + "External id": 2939692,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996122.490, "dur": 0.469, + "args": { + "External id": 2939693,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015996210.510, "dur": 58.311, + "args": { + "External id": 2939694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015996276.287, "dur": 5.514, + "args": { + "External id": 2939695,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996279.307, "dur": 0.945, + "args": { + "External id": 2939696,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996283.298, "dur": 25.755, + "args": { + "External id": 2939697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015996313.798, "dur": 5.817, + "args": { + "External id": 2939698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015996315.133, "dur": 3.822, + "args": { + "External id": 2939699,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996316.964, "dur": 1.709, + "args": { + "External id": 2939700,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015996324.003, "dur": 41.617, + "args": { + "External id": 2939701,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015996325.014, "dur": 39.967, + "args": { + "External id": 2939702,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996369.450, "dur": 14.184, + "args": { + "External id": 2939703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015996388.994, "dur": 3.447, + "args": { + "External id": 2939704,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996390.903, "dur": 0.631, + "args": { + "External id": 2939705,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015996396.444, "dur": 50.633, + "args": { + "External id": 2939706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015996397.420, "dur": 5.820, + "args": { + "External id": 2939707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015996400.126, "dur": 2.590, + "args": { + "External id": 2939708,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996401.706, "dur": 0.852, + "args": { + "External id": 2939709,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015996403.873, "dur": 42.742, + "args": { + "External id": 2939710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015996404.523, "dur": 41.528, + "args": { + "External id": 2939711,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015996466.878, "dur": 3.787, + "args": { + "External id": 2939712,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996468.839, "dur": 0.779, + "args": { + "External id": 2939713,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015996476.578, "dur": 1.511, + "args": { + "External id": 2939714,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015996486.741, "dur": 7.880, + "args": { + "External id": 2939715,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015996488.774, "dur": 5.426, + "args": { + "External id": 2939716,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015996577.080, "dur": 175.343, + "args": { + "External id": 2939717,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015996579.080, "dur": 2.155, + "args": { + "External id": 2939718,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015996582.504, "dur": 169.371, + "args": { + "External id": 2939719,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015996583.744, "dur": 0.479, + "args": { + "External id": 2939720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015996585.484, "dur": 21.799, + "args": { + "External id": 2939721,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015996608.850, "dur": 3.385, + "args": { + "External id": 2939722,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996610.836, "dur": 1.126, + "args": { + "External id": 2939723,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015996614.999, "dur": 26.520, + "args": { + "External id": 2939724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015996615.785, "dur": 1.515, + "args": { + "External id": 2939725,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015996618.344, "dur": 22.894, + "args": { + "External id": 2939726,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996622.474, "dur": 2.363, + "args": { + "External id": 2939727,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015996642.932, "dur": 19.612, + "args": { + "External id": 2939728,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996663.948, "dur": 11.782, + "args": { + "External id": 2939729,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015996678.157, "dur": 13.111, + "args": { + "External id": 2939730,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996692.555, "dur": 11.540, + "args": { + "External id": 2939731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015996705.937, "dur": 20.202, + "args": { + "External id": 2939732,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996709.363, "dur": 1.514, + "args": { + "External id": 2939733,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996712.875, "dur": 0.777, + "args": { + "External id": 2939734,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996727.492, "dur": 11.367, + "args": { + "External id": 2939735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996739.588, "dur": 11.357, + "args": { + "External id": 2939736,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015996758.535, "dur": 1.452, + "args": { + "External id": 2939737,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015996767.278, "dur": 3.229, + "args": { + "External id": 2939738,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996769.410, "dur": 0.444, + "args": { + "External id": 2939739,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015996831.691, "dur": 50.171, + "args": { + "External id": 2939740,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015996887.002, "dur": 4.573, + "args": { + "External id": 2939741,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996889.045, "dur": 1.584, + "args": { + "External id": 2939742,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015996893.035, "dur": 24.254, + "args": { + "External id": 2939743,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015996921.564, "dur": 6.328, + "args": { + "External id": 2939744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015996922.836, "dur": 4.406, + "args": { + "External id": 2939745,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015996926.116, "dur": 0.877, + "args": { + "External id": 2939746,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015996933.709, "dur": 86.160, + "args": { + "External id": 2939747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015996934.620, "dur": 83.981, + "args": { + "External id": 2939748,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997025.752, "dur": 35.342, + "args": { + "External id": 2939749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015997067.559, "dur": 4.755, + "args": { + "External id": 2939750,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997070.137, "dur": 1.128, + "args": { + "External id": 2939751,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015997076.232, "dur": 82.854, + "args": { + "External id": 2939752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015997078.940, "dur": 5.532, + "args": { + "External id": 2939753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015997079.844, "dur": 4.092, + "args": { + "External id": 2939754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997081.300, "dur": 2.418, + "args": { + "External id": 2939755,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015997085.142, "dur": 73.254, + "args": { + "External id": 2939756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015997085.701, "dur": 71.729, + "args": { + "External id": 2939757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015997164.466, "dur": 4.349, + "args": { + "External id": 2939758,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997166.795, "dur": 0.787, + "args": { + "External id": 2939759,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015997177.124, "dur": 1.546, + "args": { + "External id": 2939760,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997186.382, "dur": 6.464, + "args": { + "External id": 2939761,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997188.241, "dur": 4.345, + "args": { + "External id": 2939762,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015997280.363, "dur": 174.958, + "args": { + "External id": 2939763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997282.841, "dur": 2.211, + "args": { + "External id": 2939764,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015997287.777, "dur": 166.931, + "args": { + "External id": 2939765,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015997289.215, "dur": 0.529, + "args": { + "External id": 2939766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015997292.922, "dur": 21.203, + "args": { + "External id": 2939767,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015997315.726, "dur": 3.164, + "args": { + "External id": 2939768,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997317.781, "dur": 0.859, + "args": { + "External id": 2939769,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015997319.788, "dur": 21.317, + "args": { + "External id": 2939770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997320.613, "dur": 2.688, + "args": { + "External id": 2939771,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015997324.544, "dur": 16.217, + "args": { + "External id": 2939772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997327.117, "dur": 2.707, + "args": { + "External id": 2939773,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015997344.328, "dur": 19.773, + "args": { + "External id": 2939774,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997365.526, "dur": 12.588, + "args": { + "External id": 2939775,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015997381.082, "dur": 12.063, + "args": { + "External id": 2939776,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997394.478, "dur": 11.594, + "args": { + "External id": 2939777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015997407.689, "dur": 18.577, + "args": { + "External id": 2939778,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997409.621, "dur": 1.466, + "args": { + "External id": 2939779,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997413.267, "dur": 0.916, + "args": { + "External id": 2939780,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997429.525, "dur": 11.648, + "args": { + "External id": 2939781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997442.275, "dur": 11.082, + "args": { + "External id": 2939782,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015997461.517, "dur": 1.501, + "args": { + "External id": 2939783,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015997471.032, "dur": 3.385, + "args": { + "External id": 2939784,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997473.266, "dur": 0.465, + "args": { + "External id": 2939785,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015997536.160, "dur": 52.023, + "args": { + "External id": 2939786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015997593.153, "dur": 4.269, + "args": { + "External id": 2939787,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997595.485, "dur": 1.038, + "args": { + "External id": 2939788,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997598.850, "dur": 23.059, + "args": { + "External id": 2939789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015997627.771, "dur": 4.761, + "args": { + "External id": 2939790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015997629.134, "dur": 2.838, + "args": { + "External id": 2939791,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997630.797, "dur": 0.986, + "args": { + "External id": 2939792,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015997634.899, "dur": 39.997, + "args": { + "External id": 2939793,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015997636.063, "dur": 38.154, + "args": { + "External id": 2939794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997678.451, "dur": 13.935, + "args": { + "External id": 2939795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015997697.927, "dur": 5.076, + "args": { + "External id": 2939796,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997701.501, "dur": 0.753, + "args": { + "External id": 2939797,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015997706.407, "dur": 50.093, + "args": { + "External id": 2939798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015997707.243, "dur": 5.206, + "args": { + "External id": 2939799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015997707.941, "dur": 3.872, + "args": { + "External id": 2939800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997710.948, "dur": 0.732, + "args": { + "External id": 2939801,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015997713.030, "dur": 43.074, + "args": { + "External id": 2939802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015997715.364, "dur": 40.108, + "args": { + "External id": 2939803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015997765.963, "dur": 3.874, + "args": { + "External id": 2939804,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997767.849, "dur": 0.849, + "args": { + "External id": 2939805,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015997775.169, "dur": 1.355, + "args": { + "External id": 2939806,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997783.166, "dur": 7.167, + "args": { + "External id": 2939807,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997786.819, "dur": 3.289, + "args": { + "External id": 2939808,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015997871.596, "dur": 212.517, + "args": { + "External id": 2939809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997875.190, "dur": 2.124, + "args": { + "External id": 2939810,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015997878.688, "dur": 204.861, + "args": { + "External id": 2939811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015997879.704, "dur": 0.368, + "args": { + "External id": 2939812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015997881.075, "dur": 18.770, + "args": { + "External id": 2939813,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015997901.378, "dur": 4.871, + "args": { + "External id": 2939814,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015997905.133, "dur": 0.925, + "args": { + "External id": 2939815,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015997907.065, "dur": 21.256, + "args": { + "External id": 2939816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015997908.158, "dur": 1.631, + "args": { + "External id": 2939817,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015997912.421, "dur": 15.623, + "args": { + "External id": 2939818,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997914.657, "dur": 2.344, + "args": { + "External id": 2939819,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015997929.686, "dur": 19.078, + "args": { + "External id": 2939820,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997950.025, "dur": 13.163, + "args": { + "External id": 2939821,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015997965.861, "dur": 13.231, + "args": { + "External id": 2939822,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015997980.276, "dur": 51.086, + "args": { + "External id": 2939823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015998034.635, "dur": 22.808, + "args": { + "External id": 2939824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998036.449, "dur": 2.287, + "args": { + "External id": 2939825,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998042.307, "dur": 0.830, + "args": { + "External id": 2939826,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998058.780, "dur": 11.931, + "args": { + "External id": 2939827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998071.627, "dur": 10.813, + "args": { + "External id": 2939828,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015998091.505, "dur": 1.947, + "args": { + "External id": 2939829,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015998101.695, "dur": 3.356, + "args": { + "External id": 2939830,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998103.890, "dur": 0.359, + "args": { + "External id": 2939831,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015998190.252, "dur": 56.985, + "args": { + "External id": 2939832,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015998252.819, "dur": 7.143, + "args": { + "External id": 2939833,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998257.436, "dur": 1.018, + "args": { + "External id": 2939834,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998261.323, "dur": 25.752, + "args": { + "External id": 2939835,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015998291.804, "dur": 5.562, + "args": { + "External id": 2939836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015998293.127, "dur": 3.681, + "args": { + "External id": 2939837,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998295.056, "dur": 1.543, + "args": { + "External id": 2939838,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015998299.773, "dur": 42.816, + "args": { + "External id": 2939839,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015998302.329, "dur": 39.724, + "args": { + "External id": 2939840,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998346.135, "dur": 15.084, + "args": { + "External id": 2939841,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015998366.713, "dur": 3.607, + "args": { + "External id": 2939842,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998368.366, "dur": 1.082, + "args": { + "External id": 2939843,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015998374.002, "dur": 56.317, + "args": { + "External id": 2939844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015998382.553, "dur": 5.312, + "args": { + "External id": 2939845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015998385.211, "dur": 2.152, + "args": { + "External id": 2939846,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998386.539, "dur": 0.604, + "args": { + "External id": 2939847,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015998388.588, "dur": 41.388, + "args": { + "External id": 2939848,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015998389.157, "dur": 40.223, + "args": { + "External id": 2939849,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015998434.052, "dur": 3.605, + "args": { + "External id": 2939850,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998435.829, "dur": 0.911, + "args": { + "External id": 2939851,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015998444.916, "dur": 1.514, + "args": { + "External id": 2939852,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015998453.990, "dur": 7.219, + "args": { + "External id": 2939853,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015998455.840, "dur": 5.091, + "args": { + "External id": 2939854,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015998541.969, "dur": 178.687, + "args": { + "External id": 2939855,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015998544.327, "dur": 2.008, + "args": { + "External id": 2939856,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015998547.551, "dur": 172.685, + "args": { + "External id": 2939857,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015998550.431, "dur": 0.541, + "args": { + "External id": 2939858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015998552.177, "dur": 21.289, + "args": { + "External id": 2939859,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015998575.129, "dur": 3.289, + "args": { + "External id": 2939860,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998577.158, "dur": 0.947, + "args": { + "External id": 2939861,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015998579.195, "dur": 21.456, + "args": { + "External id": 2939862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015998580.089, "dur": 1.532, + "args": { + "External id": 2939863,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015998582.708, "dur": 17.595, + "args": { + "External id": 2939864,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998587.351, "dur": 2.442, + "args": { + "External id": 2939865,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015998601.877, "dur": 20.072, + "args": { + "External id": 2939866,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998623.309, "dur": 12.514, + "args": { + "External id": 2939867,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015998640.672, "dur": 16.835, + "args": { + "External id": 2939868,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998658.964, "dur": 13.292, + "args": { + "External id": 2939869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015998673.657, "dur": 19.114, + "args": { + "External id": 2939870,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998675.428, "dur": 1.443, + "args": { + "External id": 2939871,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998678.959, "dur": 0.867, + "args": { + "External id": 2939872,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998694.322, "dur": 12.060, + "args": { + "External id": 2939873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998708.975, "dur": 9.972, + "args": { + "External id": 2939874,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015998726.547, "dur": 1.535, + "args": { + "External id": 2939875,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015998736.144, "dur": 3.275, + "args": { + "External id": 2939876,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998738.259, "dur": 0.476, + "args": { + "External id": 2939877,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015998798.766, "dur": 46.036, + "args": { + "External id": 2939878,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015998849.169, "dur": 4.528, + "args": { + "External id": 2939879,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998851.589, "dur": 1.085, + "args": { + "External id": 2939880,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998854.786, "dur": 20.931, + "args": { + "External id": 2939881,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015998879.595, "dur": 6.059, + "args": { + "External id": 2939882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015998882.413, "dur": 2.645, + "args": { + "External id": 2939883,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998883.981, "dur": 0.872, + "args": { + "External id": 2939884,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015998887.670, "dur": 38.461, + "args": { + "External id": 2939885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015998888.614, "dur": 36.897, + "args": { + "External id": 2939886,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015998929.400, "dur": 13.009, + "args": { + "External id": 2939887,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015998947.338, "dur": 4.774, + "args": { + "External id": 2939888,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998950.616, "dur": 0.696, + "args": { + "External id": 2939889,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015998954.957, "dur": 87.367, + "args": { + "External id": 2939890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015998955.715, "dur": 5.721, + "args": { + "External id": 2939891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015998956.537, "dur": 4.383, + "args": { + "External id": 2939892,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015998958.077, "dur": 2.634, + "args": { + "External id": 2939893,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015998962.043, "dur": 79.753, + "args": { + "External id": 2939894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015998962.535, "dur": 78.046, + "args": { + "External id": 2939895,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015999048.382, "dur": 4.417, + "args": { + "External id": 2939896,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999050.887, "dur": 0.792, + "args": { + "External id": 2939897,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015999060.152, "dur": 1.336, + "args": { + "External id": 2939898,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999068.902, "dur": 5.725, + "args": { + "External id": 2939899,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999070.669, "dur": 3.683, + "args": { + "External id": 2939900,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015999171.321, "dur": 207.546, + "args": { + "External id": 2939901,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999174.071, "dur": 3.113, + "args": { + "External id": 2939902,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015999180.316, "dur": 197.952, + "args": { + "External id": 2939903,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015999181.589, "dur": 0.385, + "args": { + "External id": 2939904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015999210.863, "dur": 23.170, + "args": { + "External id": 2939905,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015999235.553, "dur": 3.047, + "args": { + "External id": 2939906,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999237.432, "dur": 0.868, + "args": { + "External id": 2939907,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015999239.381, "dur": 27.941, + "args": { + "External id": 2939908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999240.122, "dur": 8.910, + "args": { + "External id": 2939909,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015999250.173, "dur": 16.771, + "args": { + "External id": 2939910,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999253.006, "dur": 2.673, + "args": { + "External id": 2939911,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015999268.710, "dur": 19.540, + "args": { + "External id": 2939912,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999289.586, "dur": 12.071, + "args": { + "External id": 2939913,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015999305.939, "dur": 13.255, + "args": { + "External id": 2939914,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999320.383, "dur": 11.823, + "args": { + "External id": 2939915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015999333.756, "dur": 18.000, + "args": { + "External id": 2939916,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999335.140, "dur": 1.346, + "args": { + "External id": 2939917,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999338.460, "dur": 0.658, + "args": { + "External id": 2939918,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999353.075, "dur": 11.292, + "args": { + "External id": 2939919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999367.193, "dur": 10.046, + "args": { + "External id": 2939920,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015999385.466, "dur": 1.616, + "args": { + "External id": 2939921,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015999394.888, "dur": 3.411, + "args": { + "External id": 2939922,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999397.187, "dur": 0.334, + "args": { + "External id": 2939923,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015999460.820, "dur": 55.592, + "args": { + "External id": 2939924,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015999521.199, "dur": 4.261, + "args": { + "External id": 2939925,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999523.497, "dur": 0.913, + "args": { + "External id": 2939926,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999526.743, "dur": 23.401, + "args": { + "External id": 2939927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015999555.991, "dur": 5.931, + "args": { + "External id": 2939928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015999557.273, "dur": 3.961, + "args": { + "External id": 2939929,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999558.827, "dur": 2.183, + "args": { + "External id": 2939930,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015999564.374, "dur": 39.926, + "args": { + "External id": 2939931,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015999565.361, "dur": 38.148, + "args": { + "External id": 2939932,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999608.120, "dur": 13.725, + "args": { + "External id": 2939933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015999627.056, "dur": 4.866, + "args": { + "External id": 2939934,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999630.482, "dur": 0.664, + "args": { + "External id": 2939935,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555015999635.362, "dur": 46.461, + "args": { + "External id": 2939936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555015999636.038, "dur": 3.095, + "args": { + "External id": 2939937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555015999636.613, "dur": 2.032, + "args": { + "External id": 2939938,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999637.875, "dur": 0.624, + "args": { + "External id": 2939939,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555015999639.639, "dur": 41.804, + "args": { + "External id": 2939940,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555015999641.923, "dur": 38.960, + "args": { + "External id": 2939941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015999685.686, "dur": 3.270, + "args": { + "External id": 2939942,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999687.241, "dur": 0.779, + "args": { + "External id": 2939943,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015999693.639, "dur": 1.199, + "args": { + "External id": 2939944,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999701.329, "dur": 7.022, + "args": { + "External id": 2939945,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999704.445, "dur": 3.606, + "args": { + "External id": 2939946,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015999780.847, "dur": 162.917, + "args": { + "External id": 2939947,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999782.717, "dur": 2.087, + "args": { + "External id": 2939948,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555015999787.548, "dur": 155.718, + "args": { + "External id": 2939949,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555015999788.498, "dur": 0.362, + "args": { + "External id": 2939950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555015999790.095, "dur": 17.920, + "args": { + "External id": 2939951,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555015999809.345, "dur": 3.782, + "args": { + "External id": 2939952,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999811.028, "dur": 1.896, + "args": { + "External id": 2939953,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015999813.910, "dur": 23.663, + "args": { + "External id": 2939954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555015999819.778, "dur": 1.593, + "args": { + "External id": 2939955,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555015999822.430, "dur": 14.898, + "args": { + "External id": 2939956,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999824.479, "dur": 1.903, + "args": { + "External id": 2939957,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555015999838.964, "dur": 17.453, + "args": { + "External id": 2939958,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999857.508, "dur": 13.139, + "args": { + "External id": 2939959,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555015999873.188, "dur": 11.966, + "args": { + "External id": 2939960,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999886.116, "dur": 11.120, + "args": { + "External id": 2939961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555015999898.791, "dur": 18.906, + "args": { + "External id": 2939962,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999900.157, "dur": 1.322, + "args": { + "External id": 2939963,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999904.786, "dur": 0.861, + "args": { + "External id": 2939964,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999918.943, "dur": 10.907, + "args": { + "External id": 2939965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555015999930.665, "dur": 11.530, + "args": { + "External id": 2939966,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555015999949.024, "dur": 1.436, + "args": { + "External id": 2939967,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555015999957.205, "dur": 3.320, + "args": { + "External id": 2939968,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555015999959.455, "dur": 0.280, + "args": { + "External id": 2939969,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016000060.110, "dur": 54.078, + "args": { + "External id": 2939970,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016000119.398, "dur": 6.458, + "args": { + "External id": 2939971,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000123.652, "dur": 0.895, + "args": { + "External id": 2939972,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000127.153, "dur": 40.788, + "args": { + "External id": 2939973,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016000174.221, "dur": 5.436, + "args": { + "External id": 2939974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016000175.565, "dur": 3.351, + "args": { + "External id": 2939975,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000177.475, "dur": 1.222, + "args": { + "External id": 2939976,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016000182.430, "dur": 45.255, + "args": { + "External id": 2939977,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016000185.143, "dur": 42.039, + "args": { + "External id": 2939978,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000231.469, "dur": 14.878, + "args": { + "External id": 2939979,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016000250.990, "dur": 26.454, + "args": { + "External id": 2939980,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016000253.172, "dur": 23.822, + "args": { + "External id": 2939981,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000258.150, "dur": 1.877, + "args": { + "External id": 2939982,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555016000282.554, "dur": 27.188, + "args": { + "External id": 2939983,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555016000284.364, "dur": 25.185, + "args": { + "External id": 2939984,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 9959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000288.601, "dur": 4.059, + "args": { + "External id": 2939985,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000295.444, "dur": 13.609, + "args": { + "External id": 2939986,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016000321.542, "dur": 4.470, + "args": { + "External id": 2939987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016000323.258, "dur": 2.526, + "args": { + "External id": 2939988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016000327.159, "dur": 1.225, + "args": { + "External id": 2939989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016000327.772, "dur": 0.545, + "args": { + "External id": 2939990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000368.914, "dur": 20.252, + "args": { + "External id": 2939991,"Sequence number": 29294780, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000391.041, "dur": 14.238, + "args": { + "External id": 2939992,"Sequence number": 29294781, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9967 + } + }, + { + "ph": "s", "id": 9, "pid": 1336755, "tid": 1336755, "ts": 1555016000391.041, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016000411.469, "dur": 5.634, + "args": { + "External id": 2939993,"Sequence number": 29294782, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 9968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000414.710, "dur": 1.007, + "args": { + "External id": 2939994,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555016000424.716, "dur": 5.995, + "args": { + "External id": 2939995,"Sequence number": 29294782, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 9970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000428.583, "dur": 0.919, + "args": { + "External id": 2939996,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016000431.998, "dur": 2.862, + "args": { + "External id": 2939997,"Sequence number": 29294782, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 9972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000433.400, "dur": 0.886, + "args": { + "External id": 2939998,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 9973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016000440.170, "dur": 5.495, + "args": { + "External id": 2939999,"Sequence number": 29294782, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9974 + } + }, + { + "ph": "s", "id": 8, "pid": 1336755, "tid": 1336755, "ts": 1555016000440.170, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000443.353, "dur": 0.863, + "args": { + "External id": 2940000,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016000446.761, "dur": 4.349, + "args": { + "External id": 2940001,"Sequence number": 29294783, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9976 + } + }, + { + "ph": "s", "id": 7, "pid": 1336755, "tid": 1336755, "ts": 1555016000446.761, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000449.766, "dur": 0.535, + "args": { + "External id": 2940002,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336755, "tid": 1336755, + "ts": 1555016000452.085, "dur": 6.763, + "args": { + "External id": 2940003,"Sequence number": 29294784, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 9978 + } + }, + { + "ph": "s", "id": 6, "pid": 1336755, "tid": 1336755, "ts": 1555016000452.085, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000456.861, "dur": 1.060, + "args": { + "External id": 2940004,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016000459.809, "dur": 5.339, + "args": { + "External id": 2940005,"Sequence number": 29294785, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 9980 + } + }, + { + "ph": "s", "id": 5, "pid": 1336755, "tid": 1336755, "ts": 1555016000459.809, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000462.209, "dur": 2.180, + "args": { + "External id": 2940006,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 9981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555016000468.686, "dur": 27.545, + "args": { + "External id": 2940007,"Sequence number": 29294786, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555016000470.121, "dur": 25.912, + "args": { + "External id": 2940008,"Sequence number": 29294786, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016000472.501, "dur": 6.162, + "args": { + "External id": 2940009,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 9984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016000474.840, "dur": 3.297, + "args": { + "External id": 2940010,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000479.601, "dur": 15.967, + "args": { + "External id": 2940011,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 9986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016000519.050, "dur": 4.840, + "args": { + "External id": 2940012,"Sequence number": 29294786, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 9987 + } + }, + { + "ph": "s", "id": 4, "pid": 1336755, "tid": 1336755, "ts": 1555016000519.050, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016000526.165, "dur": 1.335, + "args": { + "External id": 2940013,"Sequence number": 29294787, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336755, "tid": 1336755, + "ts": 1555016000558.615, "dur": 44115.223, + "args": { + "External id": 2940014,"Sequence number": 29294787, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 9989 + } + }, + { + "ph": "s", "id": 3, "pid": 1336755, "tid": 1336755, "ts": 1555016000558.615, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336755, "tid": 1336755, + "ts": 1555016000572.136, "dur": 26.949, + "args": { + "External id": 2940015,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555016000572.593, "dur": 26.226, + "args": { + "External id": 2940016,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016000573.687, "dur": 6.429, + "args": { + "External id": 2940017,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016000575.283, "dur": 4.417, + "args": { + "External id": 2940018,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000580.766, "dur": 17.661, + "args": { + "External id": 2940019,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 9994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016000614.813, "dur": 25.368, + "args": { + "External id": 2940020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016000616.069, "dur": 7.320, + "args": { + "External id": 2940021,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000619.209, "dur": 3.857, + "args": { + "External id": 2940022,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000624.612, "dur": 15.344, + "args": { + "External id": 2940023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000626.285, "dur": 13.305, + "args": { + "External id": 2940024,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016000643.824, "dur": 17.749, + "args": { + "External id": 2940025,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 10000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016000644.436, "dur": 4.143, + "args": { + "External id": 2940026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 10001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000645.668, "dur": 2.688, + "args": { + "External id": 2940027,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000649.061, "dur": 12.330, + "args": { + "External id": 2940028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000649.702, "dur": 11.332, + "args": { + "External id": 2940029,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 10004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555016000667.040, "dur": 18.127, + "args": { + "External id": 2940030,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 10005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016000670.306, "dur": 3.127, + "args": { + "External id": 2940031,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000674.006, "dur": 10.896, + "args": { + "External id": 2940032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 10007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000674.582, "dur": 10.021, + "args": { + "External id": 2940033,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1336755, + "ts": 1555016000689.467, "dur": 19.768, + "args": { + "External id": 2940034,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016000711.572, "dur": 42.184, + "args": { + "External id": 2940035,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016000713.313, "dur": 40.065, + "args": { + "External id": 2940036,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000718.907, "dur": 1.042, + "args": { + "External id": 2940037,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555016000720.919, "dur": 19.261, + "args": { + "External id": 2940038,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555016000722.318, "dur": 17.666, + "args": { + "External id": 2940039,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 10014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016000724.132, "dur": 2.552, + "args": { + "External id": 2940040,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016000727.739, "dur": 11.858, + "args": { + "External id": 2940041,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 10016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1336755, + "ts": 1555016000758.993, "dur": 38491.927, + "args": { + "External id": 2940042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 10017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1336755, + "ts": 1555016000760.630, "dur": 38489.356, + "args": { + "External id": 2940043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 10018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016039260.441, "dur": 5.512, + "args": { + "External id": 2940044,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039263.466, "dur": 1.000, + "args": { + "External id": 2940045,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016039270.873, "dur": 101.304, + "args": { + "External id": 2940046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016039272.559, "dur": 7.216, + "args": { + "External id": 2940047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016039274.321, "dur": 4.648, + "args": { + "External id": 2940048,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039277.466, "dur": 1.239, + "args": { + "External id": 2940049,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016039280.752, "dur": 90.814, + "args": { + "External id": 2940050,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016039282.498, "dur": 88.088, + "args": { + "External id": 2940051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016039375.979, "dur": 3.674, + "args": { + "External id": 2940052,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039378.033, "dur": 0.615, + "args": { + "External id": 2940053,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016039386.475, "dur": 2.246, + "args": { + "External id": 2940054,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016039399.200, "dur": 6.411, + "args": { + "External id": 2940055,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016039401.255, "dur": 4.086, + "args": { + "External id": 2940056,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016039530.780, "dur": 189.921, + "args": { + "External id": 2940057,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016039534.584, "dur": 2.545, + "args": { + "External id": 2940058,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016039538.656, "dur": 181.363, + "args": { + "External id": 2940059,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016039540.331, "dur": 0.675, + "args": { + "External id": 2940060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016039542.420, "dur": 24.356, + "args": { + "External id": 2940061,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016039570.215, "dur": 4.822, + "args": { + "External id": 2940062,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039573.758, "dur": 0.958, + "args": { + "External id": 2940063,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016039576.086, "dur": 22.630, + "args": { + "External id": 2940064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016039576.933, "dur": 1.348, + "args": { + "External id": 2940065,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016039579.854, "dur": 18.590, + "args": { + "External id": 2940066,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039583.539, "dur": 3.178, + "args": { + "External id": 2940067,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016039600.257, "dur": 22.188, + "args": { + "External id": 2940068,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039624.405, "dur": 14.700, + "args": { + "External id": 2940069,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016039642.026, "dur": 14.526, + "args": { + "External id": 2940070,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039658.025, "dur": 12.346, + "args": { + "External id": 2940071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016039674.180, "dur": 20.345, + "args": { + "External id": 2940072,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039676.183, "dur": 1.672, + "args": { + "External id": 2940073,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039680.062, "dur": 0.857, + "args": { + "External id": 2940074,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039695.892, "dur": 10.989, + "args": { + "External id": 2940075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039708.150, "dur": 10.920, + "args": { + "External id": 2940076,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016039727.108, "dur": 1.949, + "args": { + "External id": 2940077,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016039736.896, "dur": 3.879, + "args": { + "External id": 2940078,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039739.273, "dur": 0.631, + "args": { + "External id": 2940079,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016039812.998, "dur": 63.977, + "args": { + "External id": 2940080,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016039882.336, "dur": 4.950, + "args": { + "External id": 2940081,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039884.313, "dur": 0.916, + "args": { + "External id": 2940082,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039888.896, "dur": 25.838, + "args": { + "External id": 2940083,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016039920.135, "dur": 7.711, + "args": { + "External id": 2940084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016039921.719, "dur": 5.446, + "args": { + "External id": 2940085,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016039925.524, "dur": 1.368, + "args": { + "External id": 2940086,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016039930.781, "dur": 44.117, + "args": { + "External id": 2940087,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016039931.973, "dur": 42.287, + "args": { + "External id": 2940088,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016039978.953, "dur": 54.111, + "args": { + "External id": 2940089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016040041.446, "dur": 4.998, + "args": { + "External id": 2940090,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040044.028, "dur": 1.139, + "args": { + "External id": 2940091,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016040052.703, "dur": 54.657, + "args": { + "External id": 2940092,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016040053.773, "dur": 4.619, + "args": { + "External id": 2940093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016040054.787, "dur": 3.070, + "args": { + "External id": 2940094,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040056.817, "dur": 0.838, + "args": { + "External id": 2940095,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016040059.246, "dur": 47.570, + "args": { + "External id": 2940096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016040060.113, "dur": 46.070, + "args": { + "External id": 2940097,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016040113.520, "dur": 5.601, + "args": { + "External id": 2940098,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040115.637, "dur": 2.439, + "args": { + "External id": 2940099,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016040125.769, "dur": 1.562, + "args": { + "External id": 2940100,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040135.244, "dur": 6.063, + "args": { + "External id": 2940101,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040137.233, "dur": 3.794, + "args": { + "External id": 2940102,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016040257.384, "dur": 180.280, + "args": { + "External id": 2940103,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040259.901, "dur": 3.113, + "args": { + "External id": 2940104,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016040264.703, "dur": 172.422, + "args": { + "External id": 2940105,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016040266.225, "dur": 0.503, + "args": { + "External id": 2940106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016040267.969, "dur": 25.027, + "args": { + "External id": 2940107,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016040294.599, "dur": 3.234, + "args": { + "External id": 2940108,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040296.667, "dur": 0.832, + "args": { + "External id": 2940109,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016040298.756, "dur": 24.456, + "args": { + "External id": 2940110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040299.741, "dur": 1.424, + "args": { + "External id": 2940111,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016040302.152, "dur": 20.781, + "args": { + "External id": 2940112,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040308.364, "dur": 3.580, + "args": { + "External id": 2940113,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016040324.741, "dur": 20.875, + "args": { + "External id": 2940114,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040347.067, "dur": 12.628, + "args": { + "External id": 2940115,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016040362.852, "dur": 13.452, + "args": { + "External id": 2940116,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040377.614, "dur": 11.683, + "args": { + "External id": 2940117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016040391.171, "dur": 18.500, + "args": { + "External id": 2940118,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040392.762, "dur": 1.412, + "args": { + "External id": 2940119,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040396.455, "dur": 0.811, + "args": { + "External id": 2940120,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040412.312, "dur": 11.664, + "args": { + "External id": 2940121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040425.061, "dur": 10.969, + "args": { + "External id": 2940122,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016040444.363, "dur": 2.266, + "args": { + "External id": 2940123,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016040455.604, "dur": 3.610, + "args": { + "External id": 2940124,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040457.955, "dur": 0.460, + "args": { + "External id": 2940125,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016040524.267, "dur": 55.840, + "args": { + "External id": 2940126,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016040584.908, "dur": 3.928, + "args": { + "External id": 2940127,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040586.915, "dur": 0.917, + "args": { + "External id": 2940128,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040592.041, "dur": 23.664, + "args": { + "External id": 2940129,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016040620.437, "dur": 4.883, + "args": { + "External id": 2940130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016040621.749, "dur": 3.024, + "args": { + "External id": 2940131,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040623.587, "dur": 0.916, + "args": { + "External id": 2940132,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016040627.945, "dur": 40.873, + "args": { + "External id": 2940133,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016040629.163, "dur": 39.181, + "args": { + "External id": 2940134,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040672.647, "dur": 13.990, + "args": { + "External id": 2940135,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016040694.295, "dur": 3.229, + "args": { + "External id": 2940136,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040695.949, "dur": 0.673, + "args": { + "External id": 2940137,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016040701.275, "dur": 50.156, + "args": { + "External id": 2940138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016040702.255, "dur": 5.016, + "args": { + "External id": 2940139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016040703.095, "dur": 3.598, + "args": { + "External id": 2940140,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040705.971, "dur": 0.594, + "args": { + "External id": 2940141,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016040709.643, "dur": 41.365, + "args": { + "External id": 2940142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016040710.307, "dur": 40.089, + "args": { + "External id": 2940143,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016040755.274, "dur": 3.412, + "args": { + "External id": 2940144,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040757.018, "dur": 0.693, + "args": { + "External id": 2940145,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016040763.994, "dur": 1.391, + "args": { + "External id": 2940146,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040772.382, "dur": 5.620, + "args": { + "External id": 2940147,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040774.307, "dur": 3.296, + "args": { + "External id": 2940148,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016040859.259, "dur": 209.618, + "args": { + "External id": 2940149,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040861.532, "dur": 2.097, + "args": { + "External id": 2940150,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016040864.812, "dur": 203.458, + "args": { + "External id": 2940151,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016040867.934, "dur": 0.252, + "args": { + "External id": 2940152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016040869.298, "dur": 18.278, + "args": { + "External id": 2940153,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016040889.053, "dur": 3.162, + "args": { + "External id": 2940154,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016040891.013, "dur": 0.884, + "args": { + "External id": 2940155,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016040892.890, "dur": 22.801, + "args": { + "External id": 2940156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016040895.647, "dur": 1.731, + "args": { + "External id": 2940157,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016040898.335, "dur": 16.996, + "args": { + "External id": 2940158,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040902.325, "dur": 2.126, + "args": { + "External id": 2940159,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016040916.754, "dur": 16.385, + "args": { + "External id": 2940160,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040934.262, "dur": 14.559, + "args": { + "External id": 2940161,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016040951.326, "dur": 12.529, + "args": { + "External id": 2940162,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040964.889, "dur": 11.782, + "args": { + "External id": 2940163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016040978.370, "dur": 61.458, + "args": { + "External id": 2940164,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016040979.640, "dur": 37.683, + "args": { + "External id": 2940165,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041021.230, "dur": 1.006, + "args": { + "External id": 2940166,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041043.102, "dur": 11.556, + "args": { + "External id": 2940167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041055.748, "dur": 11.238, + "args": { + "External id": 2940168,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016041076.036, "dur": 2.099, + "args": { + "External id": 2940169,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016041087.042, "dur": 3.848, + "args": { + "External id": 2940170,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041089.445, "dur": 0.640, + "args": { + "External id": 2940171,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016041172.761, "dur": 60.023, + "args": { + "External id": 2940172,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016041238.422, "dur": 8.032, + "args": { + "External id": 2940173,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041242.578, "dur": 2.451, + "args": { + "External id": 2940174,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041247.932, "dur": 27.021, + "args": { + "External id": 2940175,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016041280.039, "dur": 5.111, + "args": { + "External id": 2940176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016041281.231, "dur": 3.083, + "args": { + "External id": 2940177,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041283.384, "dur": 0.670, + "args": { + "External id": 2940178,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016041287.646, "dur": 74.596, + "args": { + "External id": 2940179,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016041288.720, "dur": 72.878, + "args": { + "External id": 2940180,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041367.846, "dur": 36.292, + "args": { + "External id": 2940181,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016041409.732, "dur": 3.672, + "args": { + "External id": 2940182,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041411.718, "dur": 0.857, + "args": { + "External id": 2940183,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016041417.292, "dur": 63.870, + "args": { + "External id": 2940184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016041418.186, "dur": 5.070, + "args": { + "External id": 2940185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016041418.849, "dur": 3.927, + "args": { + "External id": 2940186,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041422.070, "dur": 0.571, + "args": { + "External id": 2940187,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016041423.779, "dur": 57.005, + "args": { + "External id": 2940188,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016041424.587, "dur": 55.528, + "args": { + "External id": 2940189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016041484.488, "dur": 16.433, + "args": { + "External id": 2940190,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041499.247, "dur": 0.691, + "args": { + "External id": 2940191,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016041506.867, "dur": 1.516, + "args": { + "External id": 2940192,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016041517.000, "dur": 8.329, + "args": { + "External id": 2940193,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016041519.036, "dur": 5.964, + "args": { + "External id": 2940194,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016041608.908, "dur": 166.156, + "args": { + "External id": 2940195,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016041610.998, "dur": 2.052, + "args": { + "External id": 2940196,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016041614.160, "dur": 160.508, + "args": { + "External id": 2940197,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016041615.387, "dur": 0.631, + "args": { + "External id": 2940198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016041617.139, "dur": 22.067, + "args": { + "External id": 2940199,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016041642.276, "dur": 2.831, + "args": { + "External id": 2940200,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041644.012, "dur": 0.752, + "args": { + "External id": 2940201,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016041645.933, "dur": 20.701, + "args": { + "External id": 2940202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016041646.634, "dur": 1.441, + "args": { + "External id": 2940203,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016041649.157, "dur": 17.151, + "args": { + "External id": 2940204,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041653.065, "dur": 2.720, + "args": { + "External id": 2940205,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016041667.904, "dur": 19.159, + "args": { + "External id": 2940206,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041688.356, "dur": 11.789, + "args": { + "External id": 2940207,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016041702.603, "dur": 12.847, + "args": { + "External id": 2940208,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041716.607, "dur": 11.520, + "args": { + "External id": 2940209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016041731.481, "dur": 17.624, + "args": { + "External id": 2940210,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041732.988, "dur": 1.302, + "args": { + "External id": 2940211,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041736.207, "dur": 0.653, + "args": { + "External id": 2940212,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041750.407, "dur": 11.153, + "args": { + "External id": 2940213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041762.487, "dur": 10.924, + "args": { + "External id": 2940214,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016041780.630, "dur": 1.580, + "args": { + "External id": 2940215,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016041789.463, "dur": 3.204, + "args": { + "External id": 2940216,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041791.494, "dur": 0.403, + "args": { + "External id": 2940217,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016041854.131, "dur": 48.493, + "args": { + "External id": 2940218,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016041907.111, "dur": 4.080, + "args": { + "External id": 2940219,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041909.217, "dur": 1.056, + "args": { + "External id": 2940220,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016041912.437, "dur": 21.441, + "args": { + "External id": 2940221,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016041937.970, "dur": 5.671, + "args": { + "External id": 2940222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016041939.133, "dur": 3.960, + "args": { + "External id": 2940223,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016041942.205, "dur": 0.697, + "args": { + "External id": 2940224,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016041945.740, "dur": 88.793, + "args": { + "External id": 2940225,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016041946.843, "dur": 86.555, + "args": { + "External id": 2940226,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042040.864, "dur": 16.404, + "args": { + "External id": 2940227,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016042063.474, "dur": 4.827, + "args": { + "External id": 2940228,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042065.959, "dur": 1.042, + "args": { + "External id": 2940229,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016042072.296, "dur": 55.866, + "args": { + "External id": 2940230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016042074.874, "dur": 5.579, + "args": { + "External id": 2940231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016042075.690, "dur": 4.219, + "args": { + "External id": 2940232,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042077.289, "dur": 2.443, + "args": { + "External id": 2940233,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016042081.245, "dur": 46.384, + "args": { + "External id": 2940234,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016042081.949, "dur": 45.098, + "args": { + "External id": 2940235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016042132.073, "dur": 3.102, + "args": { + "External id": 2940236,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042133.713, "dur": 0.538, + "args": { + "External id": 2940237,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016042141.187, "dur": 14.297, + "args": { + "External id": 2940238,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042166.987, "dur": 6.900, + "args": { + "External id": 2940239,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042168.944, "dur": 4.691, + "args": { + "External id": 2940240,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016042259.932, "dur": 175.987, + "args": { + "External id": 2940241,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042262.311, "dur": 2.011, + "args": { + "External id": 2940242,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016042265.421, "dur": 170.137, + "args": { + "External id": 2940243,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016042266.628, "dur": 0.477, + "args": { + "External id": 2940244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016042271.391, "dur": 21.470, + "args": { + "External id": 2940245,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016042298.687, "dur": 3.713, + "args": { + "External id": 2940246,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042301.120, "dur": 1.027, + "args": { + "External id": 2940247,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016042303.142, "dur": 20.794, + "args": { + "External id": 2940248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042303.800, "dur": 2.730, + "args": { + "External id": 2940249,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016042307.600, "dur": 15.985, + "args": { + "External id": 2940250,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042310.263, "dur": 2.404, + "args": { + "External id": 2940251,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016042325.350, "dur": 18.564, + "args": { + "External id": 2940252,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042345.107, "dur": 12.950, + "args": { + "External id": 2940253,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016042360.493, "dur": 12.921, + "args": { + "External id": 2940254,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042376.336, "dur": 11.238, + "args": { + "External id": 2940255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016042389.168, "dur": 18.832, + "args": { + "External id": 2940256,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042390.559, "dur": 1.504, + "args": { + "External id": 2940257,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042394.048, "dur": 1.030, + "args": { + "External id": 2940258,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042409.440, "dur": 11.715, + "args": { + "External id": 2940259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042421.996, "dur": 11.061, + "args": { + "External id": 2940260,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016042442.178, "dur": 1.586, + "args": { + "External id": 2940261,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016042452.659, "dur": 3.692, + "args": { + "External id": 2940262,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042455.078, "dur": 0.411, + "args": { + "External id": 2940263,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016042519.431, "dur": 55.764, + "args": { + "External id": 2940264,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016042580.337, "dur": 4.226, + "args": { + "External id": 2940265,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042582.637, "dur": 0.880, + "args": { + "External id": 2940266,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042585.747, "dur": 22.411, + "args": { + "External id": 2940267,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016042612.397, "dur": 7.702, + "args": { + "External id": 2940268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016042615.568, "dur": 3.948, + "args": { + "External id": 2940269,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042617.255, "dur": 2.003, + "args": { + "External id": 2940270,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016042622.699, "dur": 40.518, + "args": { + "External id": 2940271,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016042623.589, "dur": 38.943, + "args": { + "External id": 2940272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042667.018, "dur": 13.338, + "args": { + "External id": 2940273,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016042685.745, "dur": 4.482, + "args": { + "External id": 2940274,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042688.838, "dur": 0.640, + "args": { + "External id": 2940275,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016042694.036, "dur": 46.155, + "args": { + "External id": 2940276,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016042694.757, "dur": 3.254, + "args": { + "External id": 2940277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016042695.349, "dur": 2.076, + "args": { + "External id": 2940278,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042696.621, "dur": 0.681, + "args": { + "External id": 2940279,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016042698.643, "dur": 41.205, + "args": { + "External id": 2940280,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016042699.171, "dur": 40.197, + "args": { + "External id": 2940281,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016042745.376, "dur": 3.357, + "args": { + "External id": 2940282,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042747.188, "dur": 0.634, + "args": { + "External id": 2940283,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016042753.826, "dur": 1.118, + "args": { + "External id": 2940284,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042761.386, "dur": 5.715, + "args": { + "External id": 2940285,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042763.153, "dur": 3.668, + "args": { + "External id": 2940286,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016042845.639, "dur": 203.684, + "args": { + "External id": 2940287,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042847.648, "dur": 2.060, + "args": { + "External id": 2940288,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016042852.567, "dur": 196.223, + "args": { + "External id": 2940289,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016042853.510, "dur": 0.360, + "args": { + "External id": 2940290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016042854.981, "dur": 18.802, + "args": { + "External id": 2940291,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016042875.208, "dur": 4.408, + "args": { + "External id": 2940292,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042877.286, "dur": 2.080, + "args": { + "External id": 2940293,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016042880.337, "dur": 20.343, + "args": { + "External id": 2940294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016042881.321, "dur": 1.398, + "args": { + "External id": 2940295,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016042885.247, "dur": 15.109, + "args": { + "External id": 2940296,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042887.199, "dur": 2.147, + "args": { + "External id": 2940297,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016042901.831, "dur": 17.216, + "args": { + "External id": 2940298,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042920.250, "dur": 11.636, + "args": { + "External id": 2940299,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016042934.201, "dur": 12.674, + "args": { + "External id": 2940300,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042947.987, "dur": 10.700, + "args": { + "External id": 2940301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016042960.287, "dur": 20.868, + "args": { + "External id": 2940302,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042961.701, "dur": 1.263, + "args": { + "External id": 2940303,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016042966.555, "dur": 2.132, + "args": { + "External id": 2940304,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016042982.340, "dur": 51.461, + "args": { + "External id": 2940305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043035.877, "dur": 11.173, + "args": { + "External id": 2940306,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016043056.759, "dur": 2.236, + "args": { + "External id": 2940307,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043068.484, "dur": 3.713, + "args": { + "External id": 2940308,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043070.921, "dur": 0.499, + "args": { + "External id": 2940309,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016043135.933, "dur": 72.457, + "args": { + "External id": 2940310,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043214.825, "dur": 7.034, + "args": { + "External id": 2940311,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043219.459, "dur": 1.046, + "args": { + "External id": 2940312,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043223.054, "dur": 25.104, + "args": { + "External id": 2940313,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016043253.028, "dur": 5.516, + "args": { + "External id": 2940314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016043254.351, "dur": 3.582, + "args": { + "External id": 2940315,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043256.100, "dur": 1.603, + "args": { + "External id": 2940316,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016043261.173, "dur": 41.501, + "args": { + "External id": 2940317,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016043262.351, "dur": 39.795, + "args": { + "External id": 2940318,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043308.209, "dur": 14.351, + "args": { + "External id": 2940319,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043327.818, "dur": 3.371, + "args": { + "External id": 2940320,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043329.644, "dur": 0.710, + "args": { + "External id": 2940321,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016043334.669, "dur": 47.815, + "args": { + "External id": 2940322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016043335.638, "dur": 5.258, + "args": { + "External id": 2940323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016043336.270, "dur": 4.139, + "args": { + "External id": 2940324,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043339.530, "dur": 0.770, + "args": { + "External id": 2940325,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016043341.563, "dur": 40.487, + "args": { + "External id": 2940326,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016043342.205, "dur": 39.393, + "args": { + "External id": 2940327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043386.303, "dur": 3.186, + "args": { + "External id": 2940328,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043388.019, "dur": 0.552, + "args": { + "External id": 2940329,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016043395.357, "dur": 1.513, + "args": { + "External id": 2940330,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016043404.216, "dur": 8.418, + "args": { + "External id": 2940331,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016043408.082, "dur": 4.259, + "args": { + "External id": 2940332,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016043493.732, "dur": 167.499, + "args": { + "External id": 2940333,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016043497.680, "dur": 2.298, + "args": { + "External id": 2940334,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016043501.201, "dur": 159.576, + "args": { + "External id": 2940335,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016043502.269, "dur": 0.399, + "args": { + "External id": 2940336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016043503.801, "dur": 19.081, + "args": { + "External id": 2940337,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016043524.118, "dur": 5.009, + "args": { + "External id": 2940338,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043528.002, "dur": 0.752, + "args": { + "External id": 2940339,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016043529.806, "dur": 21.100, + "args": { + "External id": 2940340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016043530.821, "dur": 1.262, + "args": { + "External id": 2940341,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016043534.862, "dur": 15.783, + "args": { + "External id": 2940342,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043537.342, "dur": 2.543, + "args": { + "External id": 2940343,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016043552.135, "dur": 19.099, + "args": { + "External id": 2940344,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043572.662, "dur": 12.351, + "args": { + "External id": 2940345,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016043587.462, "dur": 12.783, + "args": { + "External id": 2940346,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043601.388, "dur": 11.862, + "args": { + "External id": 2940347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016043614.690, "dur": 20.654, + "args": { + "External id": 2940348,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043618.102, "dur": 1.375, + "args": { + "External id": 2940349,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043622.627, "dur": 0.715, + "args": { + "External id": 2940350,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043636.660, "dur": 11.180, + "args": { + "External id": 2940351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043648.753, "dur": 11.133, + "args": { + "External id": 2940352,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016043667.199, "dur": 1.250, + "args": { + "External id": 2940353,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043676.548, "dur": 3.112, + "args": { + "External id": 2940354,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043678.419, "dur": 0.425, + "args": { + "External id": 2940355,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016043737.068, "dur": 47.031, + "args": { + "External id": 2940356,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043788.311, "dur": 5.722, + "args": { + "External id": 2940357,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043792.132, "dur": 0.840, + "args": { + "External id": 2940358,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043795.067, "dur": 20.488, + "args": { + "External id": 2940359,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016043819.314, "dur": 4.721, + "args": { + "External id": 2940360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016043820.548, "dur": 2.949, + "args": { + "External id": 2940361,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043822.116, "dur": 1.173, + "args": { + "External id": 2940362,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016043826.119, "dur": 40.975, + "args": { + "External id": 2940363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016043828.674, "dur": 37.759, + "args": { + "External id": 2940364,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016043870.528, "dur": 13.152, + "args": { + "External id": 2940365,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043888.665, "dur": 3.657, + "args": { + "External id": 2940366,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043890.713, "dur": 0.815, + "args": { + "External id": 2940367,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336755, "tid": 1336755, + "ts": 1555016043895.184, "dur": 45.447, + "args": { + "External id": 2940368,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016043895.897, "dur": 4.384, + "args": { + "External id": 2940369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016043896.507, "dur": 3.324, + "args": { + "External id": 2940370,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043899.197, "dur": 0.521, + "args": { + "External id": 2940371,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016043900.782, "dur": 39.446, + "args": { + "External id": 2940372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016043901.479, "dur": 38.224, + "args": { + "External id": 2940373,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016043944.123, "dur": 3.357, + "args": { + "External id": 2940374,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016043945.819, "dur": 0.558, + "args": { + "External id": 2940375,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016043951.946, "dur": 1.169, + "args": { + "External id": 2940376,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016043960.502, "dur": 5.229, + "args": { + "External id": 2940377,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016043962.143, "dur": 3.279, + "args": { + "External id": 2940378,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016044084.492, "dur": 192.874, + "args": { + "External id": 2940379,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016044088.608, "dur": 3.165, + "args": { + "External id": 2940380,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336755, "tid": 1336755, + "ts": 1555016044093.320, "dur": 183.624, + "args": { + "External id": 2940381,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336755, "tid": 1336755, + "ts": 1555016044094.524, "dur": 0.283, + "args": { + "External id": 2940382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336755, "tid": 1336755, + "ts": 1555016044097.554, "dur": 20.776, + "args": { + "External id": 2940383,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336755, "tid": 1336755, + "ts": 1555016044119.811, "dur": 4.420, + "args": { + "External id": 2940384,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016044122.966, "dur": 0.848, + "args": { + "External id": 2940385,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016044124.933, "dur": 35.702, + "args": { + "External id": 2940386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555016044125.898, "dur": 1.478, + "args": { + "External id": 2940387,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555016044128.659, "dur": 31.391, + "args": { + "External id": 2940388,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044131.128, "dur": 2.534, + "args": { + "External id": 2940389,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555016044162.436, "dur": 22.720, + "args": { + "External id": 2940390,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044186.771, "dur": 12.242, + "args": { + "External id": 2940391,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336755, "tid": 1336755, + "ts": 1555016044202.018, "dur": 13.130, + "args": { + "External id": 2940392,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044218.267, "dur": 11.370, + "args": { + "External id": 2940393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016044231.216, "dur": 19.565, + "args": { + "External id": 2940394,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044232.996, "dur": 1.901, + "args": { + "External id": 2940395,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016044236.917, "dur": 0.651, + "args": { + "External id": 2940396,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044252.163, "dur": 11.371, + "args": { + "External id": 2940397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044264.424, "dur": 11.495, + "args": { + "External id": 2940398,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555016044286.514, "dur": 2.109, + "args": { + "External id": 2940399,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016044298.067, "dur": 3.521, + "args": { + "External id": 2940400,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016044300.332, "dur": 0.451, + "args": { + "External id": 2940401,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016044365.576, "dur": 59.506, + "args": { + "External id": 2940402,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336755, "tid": 1336755, + "ts": 1555016044429.900, "dur": 4.227, + "args": { + "External id": 2940403,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016044432.301, "dur": 0.717, + "args": { + "External id": 2940404,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044435.472, "dur": 23.988, + "args": { + "External id": 2940405,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336755, "tid": 1336755, + "ts": 1555016044464.042, "dur": 6.702, + "args": { + "External id": 2940406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336755, "tid": 1336755, + "ts": 1555016044465.489, "dur": 4.609, + "args": { + "External id": 2940407,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016044468.808, "dur": 1.069, + "args": { + "External id": 2940408,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336755, "tid": 1336755, + "ts": 1555016044473.238, "dur": 42.665, + "args": { + "External id": 2940409,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336755, "tid": 1336755, + "ts": 1555016044474.438, "dur": 40.799, + "args": { + "External id": 2940410,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044519.763, "dur": 13.490, + "args": { + "External id": 2940411,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016044537.693, "dur": 23.266, + "args": { + "External id": 2940412,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336755, "tid": 1336755, + "ts": 1555016044539.786, "dur": 20.684, + "args": { + "External id": 2940413,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016044546.248, "dur": 0.433, + "args": { + "External id": 2940414,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555016044565.638, "dur": 66.754, + "args": { + "External id": 2940415,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 10390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336755, "tid": 1336755, + "ts": 1555016044567.133, "dur": 64.787, + "args": { + "External id": 2940416,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 10391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016044609.310, "dur": 4.797, + "args": { + "External id": 2940417,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044615.552, "dur": 15.715, + "args": { + "External id": 2940418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016044643.822, "dur": 4.436, + "args": { + "External id": 2940419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 10394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016044645.512, "dur": 2.522, + "args": { + "External id": 2940420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 10395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016044649.269, "dur": 0.782, + "args": { + "External id": 2940421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336755, "tid": 1336755, + "ts": 1555016044649.556, "dur": 0.426, + "args": { + "External id": 2940422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044689.712, "dur": 21.605, + "args": { + "External id": 2940423,"Sequence number": 29294788, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336755, "tid": 1336755, + "ts": 1555016044713.090, "dur": 13.038, + "args": { + "External id": 2940424,"Sequence number": 29294789, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10399 + } + }, + { + "ph": "s", "id": 2, "pid": 1336755, "tid": 1336755, "ts": 1555016044713.090, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 1336755, "tid": 1336755, + "ts": 1555016044827.365, "dur": 40.066, + "args": { + "External id": 2940425,"Record function id": 0, "Ev Idx": 10400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336755, "tid": 1336755, + "ts": 1555016045026.836, "dur": 40.527, + "args": { + "External id": 2940426,"Sequence number": 29294790, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 10401 + } + }, + { + "ph": "s", "id": 1, "pid": 1336755, "tid": 1336755, "ts": 1555016045026.836, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016045104.770, "dur": 27.253, + "args": { + "External id": 2940427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 10402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555016045106.281, "dur": 8.746, + "args": { + "External id": 2940428,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 10403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555016045109.715, "dur": 4.604, + "args": { + "External id": 2940429,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555016045116.830, "dur": 14.904, + "args": { + "External id": 2940430,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 10405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336755, "tid": 1336755, + "ts": 1555017382383.756, "dur": 58.372, + "args": { + "External id": 2940431,"Sequence number": 29294791, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336755, "tid": 1336755, + "ts": 1555017382452.438, "dur": 23.273, + "args": { + "External id": 2940432,"Sequence number": 29294792, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 10407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555017382483.702, "dur": 22.631, + "args": { + "External id": 2940433,"Sequence number": 29294793, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555017383091.927, "dur": 30.129, + "args": { + "External id": 2940434,"Sequence number": 29294794, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 10409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555017383696.720, "dur": 31.747, + "args": { + "External id": 2940435,"Sequence number": 29294795, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 1336755, "tid": 1336755, + "ts": 1555017385384.018, "dur": 2965.530, + "args": { + "External id": 2940436,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 10411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 1336755, "tid": 1336755, + "ts": 1555017386017.688, "dur": 952.553, + "args": { + "External id": 2940437,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 10412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336755, "tid": 1336755, + "ts": 1555017386039.914, "dur": 64.311, + "args": { + "External id": 2940438,"Record function id": 0, "Concrete Inputs": ["[36375]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 10413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555017386043.455, "dur": 12.282, + "args": { + "External id": 2940439,"Record function id": 0, "Concrete Inputs": ["[36375]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336755, "tid": 1336755, + "ts": 1555017386058.364, "dur": 45.529, + "args": { + "External id": 2940440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[36375]], "Ev Idx": 10415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336755, "tid": 1336755, + "ts": 1555017386060.735, "dur": 42.385, + "args": { + "External id": 2940441,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[36375], []], "Ev Idx": 10416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388383.538, "dur": 3.395, + "args": { + "External id": 2940442,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388388.871, "dur": 0.344, + "args": { + "External id": 2940443,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388390.598, "dur": 0.370, + "args": { + "External id": 2940444,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388391.990, "dur": 0.356, + "args": { + "External id": 2940445,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388393.452, "dur": 0.442, + "args": { + "External id": 2940446,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388395.084, "dur": 0.228, + "args": { + "External id": 2940447,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388396.118, "dur": 0.221, + "args": { + "External id": 2940448,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388399.070, "dur": 0.218, + "args": { + "External id": 2940449,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388400.092, "dur": 0.211, + "args": { + "External id": 2940450,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388401.338, "dur": 0.213, + "args": { + "External id": 2940451,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388402.575, "dur": 0.333, + "args": { + "External id": 2940452,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388403.732, "dur": 0.319, + "args": { + "External id": 2940453,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388404.876, "dur": 0.325, + "args": { + "External id": 2940454,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388406.106, "dur": 0.333, + "args": { + "External id": 2940455,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388407.270, "dur": 0.376, + "args": { + "External id": 2940456,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388410.287, "dur": 0.216, + "args": { + "External id": 2940457,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388411.603, "dur": 0.233, + "args": { + "External id": 2940458,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388412.852, "dur": 0.339, + "args": { + "External id": 2940459,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388414.139, "dur": 0.226, + "args": { + "External id": 2940460,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388415.256, "dur": 0.383, + "args": { + "External id": 2940461,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388416.399, "dur": 0.370, + "args": { + "External id": 2940462,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388417.529, "dur": 0.361, + "args": { + "External id": 2940463,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388418.647, "dur": 0.388, + "args": { + "External id": 2940464,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388421.523, "dur": 0.223, + "args": { + "External id": 2940465,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388422.570, "dur": 0.258, + "args": { + "External id": 2940466,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388423.938, "dur": 0.225, + "args": { + "External id": 2940467,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388424.911, "dur": 0.354, + "args": { + "External id": 2940468,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388426.077, "dur": 0.334, + "args": { + "External id": 2940469,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388427.268, "dur": 0.341, + "args": { + "External id": 2940470,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388428.649, "dur": 0.214, + "args": { + "External id": 2940471,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388429.617, "dur": 0.210, + "args": { + "External id": 2940472,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388432.268, "dur": 0.209, + "args": { + "External id": 2940473,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388433.574, "dur": 0.231, + "args": { + "External id": 2940474,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388434.876, "dur": 0.232, + "args": { + "External id": 2940475,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388436.077, "dur": 0.223, + "args": { + "External id": 2940476,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388437.028, "dur": 0.233, + "args": { + "External id": 2940477,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388438.283, "dur": 0.264, + "args": { + "External id": 2940478,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388439.281, "dur": 0.214, + "args": { + "External id": 2940479,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388440.239, "dur": 0.209, + "args": { + "External id": 2940480,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388442.910, "dur": 0.213, + "args": { + "External id": 2940481,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388443.863, "dur": 0.205, + "args": { + "External id": 2940482,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388445.149, "dur": 0.212, + "args": { + "External id": 2940483,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388446.088, "dur": 0.238, + "args": { + "External id": 2940484,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388447.072, "dur": 0.202, + "args": { + "External id": 2940485,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388448.208, "dur": 0.217, + "args": { + "External id": 2940486,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388450.399, "dur": 0.243, + "args": { + "External id": 2940487,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388451.580, "dur": 0.210, + "args": { + "External id": 2940488,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388454.039, "dur": 0.205, + "args": { + "External id": 2940489,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388455.120, "dur": 0.209, + "args": { + "External id": 2940490,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388456.180, "dur": 0.209, + "args": { + "External id": 2940491,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388457.212, "dur": 0.202, + "args": { + "External id": 2940492,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388458.237, "dur": 0.209, + "args": { + "External id": 2940493,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388459.303, "dur": 0.225, + "args": { + "External id": 2940494,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388460.612, "dur": 0.228, + "args": { + "External id": 2940495,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388461.726, "dur": 0.334, + "args": { + "External id": 2940496,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388464.400, "dur": 0.215, + "args": { + "External id": 2940497,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388465.578, "dur": 0.208, + "args": { + "External id": 2940498,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388466.600, "dur": 0.344, + "args": { + "External id": 2940499,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388467.772, "dur": 0.309, + "args": { + "External id": 2940500,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388468.867, "dur": 0.307, + "args": { + "External id": 2940501,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388470.160, "dur": 0.345, + "args": { + "External id": 2940502,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388471.270, "dur": 0.335, + "args": { + "External id": 2940503,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388472.343, "dur": 0.342, + "args": { + "External id": 2940504,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388474.964, "dur": 0.222, + "args": { + "External id": 2940505,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388475.934, "dur": 0.226, + "args": { + "External id": 2940506,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388477.083, "dur": 0.233, + "args": { + "External id": 2940507,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388478.043, "dur": 0.213, + "args": { + "External id": 2940508,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388478.975, "dur": 0.205, + "args": { + "External id": 2940509,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388479.939, "dur": 0.210, + "args": { + "External id": 2940510,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388480.889, "dur": 0.207, + "args": { + "External id": 2940511,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388481.819, "dur": 0.215, + "args": { + "External id": 2940512,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388484.572, "dur": 0.210, + "args": { + "External id": 2940513,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388485.607, "dur": 0.209, + "args": { + "External id": 2940514,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388486.732, "dur": 0.207, + "args": { + "External id": 2940515,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388487.920, "dur": 0.219, + "args": { + "External id": 2940516,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388488.852, "dur": 0.205, + "args": { + "External id": 2940517,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388489.900, "dur": 0.242, + "args": { + "External id": 2940518,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388491.920, "dur": 0.374, + "args": { + "External id": 2940519,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388493.134, "dur": 0.398, + "args": { + "External id": 2940520,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388495.574, "dur": 0.206, + "args": { + "External id": 2940521,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388496.556, "dur": 0.369, + "args": { + "External id": 2940522,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388497.727, "dur": 0.204, + "args": { + "External id": 2940523,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388498.765, "dur": 0.219, + "args": { + "External id": 2940524,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388499.725, "dur": 0.206, + "args": { + "External id": 2940525,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388500.735, "dur": 0.223, + "args": { + "External id": 2940526,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388501.705, "dur": 0.206, + "args": { + "External id": 2940527,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388502.764, "dur": 0.221, + "args": { + "External id": 2940528,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388505.474, "dur": 0.204, + "args": { + "External id": 2940529,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388506.418, "dur": 0.238, + "args": { + "External id": 2940530,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388507.376, "dur": 0.205, + "args": { + "External id": 2940531,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388508.420, "dur": 0.229, + "args": { + "External id": 2940532,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388509.382, "dur": 0.207, + "args": { + "External id": 2940533,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388510.525, "dur": 0.372, + "args": { + "External id": 2940534,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388512.566, "dur": 0.357, + "args": { + "External id": 2940535,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388513.828, "dur": 0.306, + "args": { + "External id": 2940536,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388516.470, "dur": 0.207, + "args": { + "External id": 2940537,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388517.438, "dur": 0.333, + "args": { + "External id": 2940538,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388518.583, "dur": 0.324, + "args": { + "External id": 2940539,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388519.843, "dur": 0.210, + "args": { + "External id": 2940540,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388520.877, "dur": 0.338, + "args": { + "External id": 2940541,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388522.033, "dur": 0.354, + "args": { + "External id": 2940542,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388523.231, "dur": 0.222, + "args": { + "External id": 2940543,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388524.220, "dur": 0.236, + "args": { + "External id": 2940544,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388526.677, "dur": 0.225, + "args": { + "External id": 2940545,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388527.624, "dur": 0.210, + "args": { + "External id": 2940546,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388528.682, "dur": 0.267, + "args": { + "External id": 2940547,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388529.758, "dur": 0.218, + "args": { + "External id": 2940548,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388530.825, "dur": 0.204, + "args": { + "External id": 2940549,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388531.940, "dur": 0.229, + "args": { + "External id": 2940550,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388533.307, "dur": 0.208, + "args": { + "External id": 2940551,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388534.369, "dur": 0.228, + "args": { + "External id": 2940552,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388537.085, "dur": 0.211, + "args": { + "External id": 2940553,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388538.061, "dur": 0.208, + "args": { + "External id": 2940554,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388538.984, "dur": 0.213, + "args": { + "External id": 2940555,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388539.989, "dur": 0.213, + "args": { + "External id": 2940556,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388540.960, "dur": 0.239, + "args": { + "External id": 2940557,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388541.981, "dur": 0.248, + "args": { + "External id": 2940558,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388543.001, "dur": 0.206, + "args": { + "External id": 2940559,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388544.028, "dur": 0.207, + "args": { + "External id": 2940560,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388546.717, "dur": 0.205, + "args": { + "External id": 2940561,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388547.749, "dur": 0.210, + "args": { + "External id": 2940562,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388548.895, "dur": 0.241, + "args": { + "External id": 2940563,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388549.894, "dur": 0.214, + "args": { + "External id": 2940564,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388550.829, "dur": 0.206, + "args": { + "External id": 2940565,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388551.738, "dur": 0.211, + "args": { + "External id": 2940566,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388552.755, "dur": 0.234, + "args": { + "External id": 2940567,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388553.695, "dur": 0.211, + "args": { + "External id": 2940568,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388556.194, "dur": 0.206, + "args": { + "External id": 2940569,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388557.117, "dur": 0.211, + "args": { + "External id": 2940570,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388558.419, "dur": 0.203, + "args": { + "External id": 2940571,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388559.411, "dur": 0.209, + "args": { + "External id": 2940572,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388560.359, "dur": 0.224, + "args": { + "External id": 2940573,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388561.295, "dur": 0.230, + "args": { + "External id": 2940574,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388562.221, "dur": 0.209, + "args": { + "External id": 2940575,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388563.167, "dur": 0.206, + "args": { + "External id": 2940576,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388565.461, "dur": 0.209, + "args": { + "External id": 2940577,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388566.583, "dur": 0.246, + "args": { + "External id": 2940578,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388567.662, "dur": 0.204, + "args": { + "External id": 2940579,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388568.821, "dur": 0.204, + "args": { + "External id": 2940580,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388569.912, "dur": 0.228, + "args": { + "External id": 2940581,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388571.034, "dur": 0.306, + "args": { + "External id": 2940582,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388572.135, "dur": 0.332, + "args": { + "External id": 2940583,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388573.327, "dur": 0.338, + "args": { + "External id": 2940584,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388576.627, "dur": 0.206, + "args": { + "External id": 2940585,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388577.541, "dur": 0.317, + "args": { + "External id": 2940586,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388578.593, "dur": 0.320, + "args": { + "External id": 2940587,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388579.779, "dur": 0.202, + "args": { + "External id": 2940588,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388580.703, "dur": 0.323, + "args": { + "External id": 2940589,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388581.789, "dur": 0.347, + "args": { + "External id": 2940590,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388583.425, "dur": 0.365, + "args": { + "External id": 2940591,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388584.593, "dur": 0.406, + "args": { + "External id": 2940592,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388587.662, "dur": 0.212, + "args": { + "External id": 2940593,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388588.641, "dur": 0.204, + "args": { + "External id": 2940594,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388589.650, "dur": 0.216, + "args": { + "External id": 2940595,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388590.711, "dur": 0.218, + "args": { + "External id": 2940596,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388591.775, "dur": 0.200, + "args": { + "External id": 2940597,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388592.689, "dur": 0.210, + "args": { + "External id": 2940598,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388593.614, "dur": 0.209, + "args": { + "External id": 2940599,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388594.671, "dur": 0.214, + "args": { + "External id": 2940600,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388598.305, "dur": 0.241, + "args": { + "External id": 2940601,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388599.315, "dur": 0.217, + "args": { + "External id": 2940602,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388600.479, "dur": 0.210, + "args": { + "External id": 2940603,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388601.427, "dur": 0.215, + "args": { + "External id": 2940604,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388602.372, "dur": 0.207, + "args": { + "External id": 2940605,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388603.361, "dur": 0.200, + "args": { + "External id": 2940606,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388604.412, "dur": 0.355, + "args": { + "External id": 2940607,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388605.570, "dur": 0.361, + "args": { + "External id": 2940608,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388608.425, "dur": 0.235, + "args": { + "External id": 2940609,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388609.399, "dur": 0.317, + "args": { + "External id": 2940610,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388610.462, "dur": 0.314, + "args": { + "External id": 2940611,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388611.503, "dur": 0.215, + "args": { + "External id": 2940612,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388612.520, "dur": 0.236, + "args": { + "External id": 2940613,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388613.461, "dur": 0.323, + "args": { + "External id": 2940614,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388614.783, "dur": 0.308, + "args": { + "External id": 2940615,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388615.888, "dur": 0.215, + "args": { + "External id": 2940616,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388618.251, "dur": 0.205, + "args": { + "External id": 2940617,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388619.190, "dur": 0.205, + "args": { + "External id": 2940618,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388620.099, "dur": 0.209, + "args": { + "External id": 2940619,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388621.162, "dur": 0.238, + "args": { + "External id": 2940620,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388622.177, "dur": 0.214, + "args": { + "External id": 2940621,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388623.211, "dur": 0.222, + "args": { + "External id": 2940622,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388624.287, "dur": 0.217, + "args": { + "External id": 2940623,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388625.479, "dur": 0.214, + "args": { + "External id": 2940624,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388627.936, "dur": 0.210, + "args": { + "External id": 2940625,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388628.886, "dur": 0.198, + "args": { + "External id": 2940626,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388629.903, "dur": 0.208, + "args": { + "External id": 2940627,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388631.009, "dur": 0.238, + "args": { + "External id": 2940628,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388632.063, "dur": 0.207, + "args": { + "External id": 2940629,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388633.154, "dur": 0.225, + "args": { + "External id": 2940630,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388634.187, "dur": 0.210, + "args": { + "External id": 2940631,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388635.211, "dur": 0.230, + "args": { + "External id": 2940632,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388637.615, "dur": 0.209, + "args": { + "External id": 2940633,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388638.649, "dur": 0.214, + "args": { + "External id": 2940634,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388639.695, "dur": 0.236, + "args": { + "External id": 2940635,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388640.651, "dur": 0.214, + "args": { + "External id": 2940636,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388641.580, "dur": 0.209, + "args": { + "External id": 2940637,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388642.528, "dur": 0.214, + "args": { + "External id": 2940638,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388643.555, "dur": 0.227, + "args": { + "External id": 2940639,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388644.498, "dur": 0.371, + "args": { + "External id": 2940640,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388646.999, "dur": 0.227, + "args": { + "External id": 2940641,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388647.962, "dur": 0.213, + "args": { + "External id": 2940642,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388649.269, "dur": 0.204, + "args": { + "External id": 2940643,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388650.189, "dur": 0.208, + "args": { + "External id": 2940644,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388651.197, "dur": 0.204, + "args": { + "External id": 2940645,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388652.156, "dur": 0.200, + "args": { + "External id": 2940646,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388653.096, "dur": 0.210, + "args": { + "External id": 2940647,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388654.020, "dur": 0.209, + "args": { + "External id": 2940648,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388657.087, "dur": 0.229, + "args": { + "External id": 2940649,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388658.039, "dur": 0.205, + "args": { + "External id": 2940650,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388658.959, "dur": 0.211, + "args": { + "External id": 2940651,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388659.964, "dur": 0.242, + "args": { + "External id": 2940652,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388660.941, "dur": 0.202, + "args": { + "External id": 2940653,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388661.885, "dur": 0.362, + "args": { + "External id": 2940654,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388662.968, "dur": 0.340, + "args": { + "External id": 2940655,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388664.083, "dur": 0.325, + "args": { + "External id": 2940656,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388666.615, "dur": 0.209, + "args": { + "External id": 2940657,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388667.561, "dur": 0.361, + "args": { + "External id": 2940658,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388668.657, "dur": 0.331, + "args": { + "External id": 2940659,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388669.701, "dur": 0.309, + "args": { + "External id": 2940660,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388670.764, "dur": 0.364, + "args": { + "External id": 2940661,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388671.832, "dur": 0.339, + "args": { + "External id": 2940662,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388672.884, "dur": 0.209, + "args": { + "External id": 2940663,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388673.833, "dur": 0.213, + "args": { + "External id": 2940664,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388676.154, "dur": 0.234, + "args": { + "External id": 2940665,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388677.208, "dur": 0.238, + "args": { + "External id": 2940666,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388678.295, "dur": 0.209, + "args": { + "External id": 2940667,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388679.210, "dur": 0.214, + "args": { + "External id": 2940668,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388680.246, "dur": 0.209, + "args": { + "External id": 2940669,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388681.280, "dur": 0.228, + "args": { + "External id": 2940670,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388682.377, "dur": 0.207, + "args": { + "External id": 2940671,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388683.440, "dur": 0.226, + "args": { + "External id": 2940672,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388686.175, "dur": 0.209, + "args": { + "External id": 2940673,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388687.100, "dur": 0.242, + "args": { + "External id": 2940674,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388688.540, "dur": 0.204, + "args": { + "External id": 2940675,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388689.928, "dur": 0.221, + "args": { + "External id": 2940676,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388690.876, "dur": 0.210, + "args": { + "External id": 2940677,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388691.900, "dur": 0.213, + "args": { + "External id": 2940678,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388692.839, "dur": 0.209, + "args": { + "External id": 2940679,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388693.806, "dur": 0.334, + "args": { + "External id": 2940680,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388696.347, "dur": 0.208, + "args": { + "External id": 2940681,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388697.293, "dur": 0.348, + "args": { + "External id": 2940682,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388698.557, "dur": 0.321, + "args": { + "External id": 2940683,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388699.731, "dur": 0.218, + "args": { + "External id": 2940684,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388700.675, "dur": 0.347, + "args": { + "External id": 2940685,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388701.735, "dur": 0.329, + "args": { + "External id": 2940686,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388702.800, "dur": 0.323, + "args": { + "External id": 2940687,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388703.866, "dur": 0.341, + "args": { + "External id": 2940688,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388706.199, "dur": 0.208, + "args": { + "External id": 2940689,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388707.181, "dur": 0.214, + "args": { + "External id": 2940690,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388708.110, "dur": 0.209, + "args": { + "External id": 2940691,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388709.045, "dur": 0.214, + "args": { + "External id": 2940692,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388709.976, "dur": 0.209, + "args": { + "External id": 2940693,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388710.915, "dur": 0.216, + "args": { + "External id": 2940694,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388711.842, "dur": 0.211, + "args": { + "External id": 2940695,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388712.819, "dur": 0.212, + "args": { + "External id": 2940696,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388715.185, "dur": 0.214, + "args": { + "External id": 2940697,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388719.385, "dur": 0.213, + "args": { + "External id": 2940698,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388720.474, "dur": 0.223, + "args": { + "External id": 2940699,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388721.441, "dur": 0.210, + "args": { + "External id": 2940700,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388722.371, "dur": 0.250, + "args": { + "External id": 2940701,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388723.357, "dur": 0.200, + "args": { + "External id": 2940702,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388724.317, "dur": 0.209, + "args": { + "External id": 2940703,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388725.250, "dur": 0.212, + "args": { + "External id": 2940704,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388727.638, "dur": 0.235, + "args": { + "External id": 2940705,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388728.646, "dur": 0.207, + "args": { + "External id": 2940706,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388729.605, "dur": 0.212, + "args": { + "External id": 2940707,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388730.547, "dur": 0.204, + "args": { + "External id": 2940708,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388731.582, "dur": 0.203, + "args": { + "External id": 2940709,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388732.836, "dur": 0.208, + "args": { + "External id": 2940710,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388734.224, "dur": 0.205, + "args": { + "External id": 2940711,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388735.157, "dur": 0.214, + "args": { + "External id": 2940712,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388737.854, "dur": 0.213, + "args": { + "External id": 2940713,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388738.968, "dur": 0.206, + "args": { + "External id": 2940714,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388741.192, "dur": 0.220, + "args": { + "External id": 2940715,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388742.206, "dur": 0.199, + "args": { + "External id": 2940716,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388743.230, "dur": 0.209, + "args": { + "External id": 2940717,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388744.519, "dur": 0.208, + "args": { + "External id": 2940718,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388745.446, "dur": 0.198, + "args": { + "External id": 2940719,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388746.465, "dur": 0.208, + "args": { + "External id": 2940720,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388748.777, "dur": 0.209, + "args": { + "External id": 2940721,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388749.757, "dur": 0.215, + "args": { + "External id": 2940722,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388750.863, "dur": 0.217, + "args": { + "External id": 2940723,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388751.858, "dur": 0.207, + "args": { + "External id": 2940724,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388752.778, "dur": 0.300, + "args": { + "External id": 2940725,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388753.864, "dur": 0.359, + "args": { + "External id": 2940726,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388754.952, "dur": 0.333, + "args": { + "External id": 2940727,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388756.161, "dur": 0.391, + "args": { + "External id": 2940728,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388758.835, "dur": 0.229, + "args": { + "External id": 2940729,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388759.856, "dur": 0.310, + "args": { + "External id": 2940730,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388760.921, "dur": 0.209, + "args": { + "External id": 2940731,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017388761.870, "dur": 0.210, + "args": { + "External id": 2940732,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 1336755, "tid": 1336755, + "ts": 1555017388817.650, "dur": 1532.789, + "args": { + "External id": 2940733,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 1336755, "tid": 1336755, + "ts": 1555017389288.032, "dur": 987.650, + "args": { + "External id": 2940734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389296.953, "dur": 7.214, + "args": { + "External id": 2940735,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389300.730, "dur": 2.924, + "args": { + "External id": 2940736,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389304.879, "dur": 2.757, + "args": { + "External id": 2940737,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389305.674, "dur": 1.830, + "args": { + "External id": 2940738,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389308.030, "dur": 3.160, + "args": { + "External id": 2940739,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389310.349, "dur": 0.710, + "args": { + "External id": 2940740,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389311.650, "dur": 1.482, + "args": { + "External id": 2940741,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389312.206, "dur": 0.857, + "args": { + "External id": 2940742,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389314.746, "dur": 3.717, + "args": { + "External id": 2940743,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389317.888, "dur": 0.510, + "args": { + "External id": 2940744,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389318.787, "dur": 1.091, + "args": { + "External id": 2940745,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389319.205, "dur": 0.602, + "args": { + "External id": 2940746,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389320.238, "dur": 1.454, + "args": { + "External id": 2940747,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389320.794, "dur": 0.833, + "args": { + "External id": 2940748,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389322.031, "dur": 2.679, + "args": { + "External id": 2940749,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389324.088, "dur": 0.558, + "args": { + "External id": 2940750,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389325.099, "dur": 1.375, + "args": { + "External id": 2940751,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389325.801, "dur": 0.608, + "args": { + "External id": 2940752,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389326.787, "dur": 2.315, + "args": { + "External id": 2940753,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389327.325, "dur": 1.532, + "args": { + "External id": 2940754,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389329.451, "dur": 2.388, + "args": { + "External id": 2940755,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389331.251, "dur": 0.519, + "args": { + "External id": 2940756,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389332.198, "dur": 1.078, + "args": { + "External id": 2940757,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389332.568, "dur": 0.643, + "args": { + "External id": 2940758,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389335.095, "dur": 3.863, + "args": { + "External id": 2940759,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389338.293, "dur": 0.600, + "args": { + "External id": 2940760,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389339.250, "dur": 1.062, + "args": { + "External id": 2940761,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389339.647, "dur": 0.596, + "args": { + "External id": 2940762,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389340.563, "dur": 1.371, + "args": { + "External id": 2940763,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389341.077, "dur": 0.792, + "args": { + "External id": 2940764,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389342.301, "dur": 2.654, + "args": { + "External id": 2940765,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389344.303, "dur": 0.583, + "args": { + "External id": 2940766,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389345.265, "dur": 1.202, + "args": { + "External id": 2940767,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389345.914, "dur": 0.482, + "args": { + "External id": 2940768,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389346.893, "dur": 2.217, + "args": { + "External id": 2940769,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389347.412, "dur": 1.629, + "args": { + "External id": 2940770,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389349.433, "dur": 2.526, + "args": { + "External id": 2940771,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389351.148, "dur": 0.744, + "args": { + "External id": 2940772,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389352.188, "dur": 1.501, + "args": { + "External id": 2940773,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389352.836, "dur": 0.789, + "args": { + "External id": 2940774,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389355.704, "dur": 3.930, + "args": { + "External id": 2940775,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389359.007, "dur": 0.562, + "args": { + "External id": 2940776,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389359.975, "dur": 1.071, + "args": { + "External id": 2940777,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389360.460, "dur": 0.523, + "args": { + "External id": 2940778,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389361.438, "dur": 1.412, + "args": { + "External id": 2940779,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389362.107, "dur": 0.678, + "args": { + "External id": 2940780,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389363.224, "dur": 3.811, + "args": { + "External id": 2940781,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389366.362, "dur": 0.611, + "args": { + "External id": 2940782,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389367.298, "dur": 1.115, + "args": { + "External id": 2940783,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389367.664, "dur": 0.685, + "args": { + "External id": 2940784,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389368.773, "dur": 2.496, + "args": { + "External id": 2940785,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389369.257, "dur": 1.939, + "args": { + "External id": 2940786,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389371.626, "dur": 2.108, + "args": { + "External id": 2940787,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389373.146, "dur": 0.523, + "args": { + "External id": 2940788,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389374.403, "dur": 1.375, + "args": { + "External id": 2940789,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389374.952, "dur": 0.637, + "args": { + "External id": 2940790,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389377.374, "dur": 4.013, + "args": { + "External id": 2940791,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389380.785, "dur": 0.527, + "args": { + "External id": 2940792,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389381.636, "dur": 1.274, + "args": { + "External id": 2940793,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389382.096, "dur": 0.750, + "args": { + "External id": 2940794,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389383.183, "dur": 1.233, + "args": { + "External id": 2940795,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389383.684, "dur": 0.668, + "args": { + "External id": 2940796,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389384.879, "dur": 3.250, + "args": { + "External id": 2940797,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389387.538, "dur": 0.526, + "args": { + "External id": 2940798,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389388.465, "dur": 1.278, + "args": { + "External id": 2940799,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389389.143, "dur": 0.529, + "args": { + "External id": 2940800,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389390.259, "dur": 2.724, + "args": { + "External id": 2940801,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389390.919, "dur": 1.859, + "args": { + "External id": 2940802,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389393.338, "dur": 3.342, + "args": { + "External id": 2940803,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389395.994, "dur": 0.620, + "args": { + "External id": 2940804,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389397.117, "dur": 2.292, + "args": { + "External id": 2940805,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389397.560, "dur": 1.775, + "args": { + "External id": 2940806,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389400.951, "dur": 2.442, + "args": { + "External id": 2940807,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389402.810, "dur": 0.515, + "args": { + "External id": 2940808,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389403.618, "dur": 1.220, + "args": { + "External id": 2940809,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389404.100, "dur": 0.669, + "args": { + "External id": 2940810,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389405.301, "dur": 2.682, + "args": { + "External id": 2940811,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389407.371, "dur": 0.543, + "args": { + "External id": 2940812,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389408.342, "dur": 2.463, + "args": { + "External id": 2940813,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389410.161, "dur": 0.577, + "args": { + "External id": 2940814,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389411.086, "dur": 1.362, + "args": { + "External id": 2940815,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389411.606, "dur": 0.777, + "args": { + "External id": 2940816,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389412.677, "dur": 2.573, + "args": { + "External id": 2940817,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389414.413, "dur": 0.770, + "args": { + "External id": 2940818,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389415.480, "dur": 2.196, + "args": { + "External id": 2940819,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389416.977, "dur": 0.636, + "args": { + "External id": 2940820,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389417.902, "dur": 2.627, + "args": { + "External id": 2940821,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389418.431, "dur": 2.019, + "args": { + "External id": 2940822,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389422.322, "dur": 2.176, + "args": { + "External id": 2940823,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389423.638, "dur": 0.795, + "args": { + "External id": 2940824,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389424.727, "dur": 1.569, + "args": { + "External id": 2940825,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389425.491, "dur": 0.736, + "args": { + "External id": 2940826,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389426.624, "dur": 2.563, + "args": { + "External id": 2940827,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389428.553, "dur": 0.562, + "args": { + "External id": 2940828,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389429.537, "dur": 2.415, + "args": { + "External id": 2940829,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389431.185, "dur": 0.698, + "args": { + "External id": 2940830,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389432.327, "dur": 2.020, + "args": { + "External id": 2940831,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389433.433, "dur": 0.846, + "args": { + "External id": 2940832,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389434.685, "dur": 2.966, + "args": { + "External id": 2940833,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389437.062, "dur": 0.523, + "args": { + "External id": 2940834,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389437.921, "dur": 1.771, + "args": { + "External id": 2940835,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389438.964, "dur": 0.657, + "args": { + "External id": 2940836,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389440.125, "dur": 2.984, + "args": { + "External id": 2940837,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389441.189, "dur": 1.704, + "args": { + "External id": 2940838,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389444.897, "dur": 1.792, + "args": { + "External id": 2940839,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389445.938, "dur": 0.685, + "args": { + "External id": 2940840,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389447.178, "dur": 1.558, + "args": { + "External id": 2940841,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389447.940, "dur": 0.733, + "args": { + "External id": 2940842,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389449.055, "dur": 3.270, + "args": { + "External id": 2940843,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389451.731, "dur": 0.530, + "args": { + "External id": 2940844,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389452.573, "dur": 1.779, + "args": { + "External id": 2940845,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389453.688, "dur": 0.596, + "args": { + "External id": 2940846,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389454.604, "dur": 1.580, + "args": { + "External id": 2940847,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389455.414, "dur": 0.704, + "args": { + "External id": 2940848,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389456.533, "dur": 3.538, + "args": { + "External id": 2940849,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389459.335, "dur": 0.672, + "args": { + "External id": 2940850,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389460.471, "dur": 1.633, + "args": { + "External id": 2940851,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389461.294, "dur": 0.747, + "args": { + "External id": 2940852,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389462.435, "dur": 2.987, + "args": { + "External id": 2940853,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389463.607, "dur": 1.743, + "args": { + "External id": 2940854,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389467.233, "dur": 1.451, + "args": { + "External id": 2940855,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389468.113, "dur": 0.502, + "args": { + "External id": 2940856,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389468.910, "dur": 1.710, + "args": { + "External id": 2940857,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389469.834, "dur": 0.714, + "args": { + "External id": 2940858,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389470.988, "dur": 3.360, + "args": { + "External id": 2940859,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389473.753, "dur": 0.526, + "args": { + "External id": 2940860,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389474.917, "dur": 1.471, + "args": { + "External id": 2940861,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389475.777, "dur": 0.542, + "args": { + "External id": 2940862,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389476.748, "dur": 1.716, + "args": { + "External id": 2940863,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389477.732, "dur": 0.664, + "args": { + "External id": 2940864,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389478.802, "dur": 3.216, + "args": { + "External id": 2940865,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389481.393, "dur": 0.560, + "args": { + "External id": 2940866,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389482.482, "dur": 1.316, + "args": { + "External id": 2940867,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389483.064, "dur": 0.665, + "args": { + "External id": 2940868,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389484.031, "dur": 2.771, + "args": { + "External id": 2940869,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389485.135, "dur": 1.595, + "args": { + "External id": 2940870,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389488.366, "dur": 1.785, + "args": { + "External id": 2940871,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389489.255, "dur": 0.830, + "args": { + "External id": 2940872,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389490.648, "dur": 1.970, + "args": { + "External id": 2940873,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389491.682, "dur": 0.867, + "args": { + "External id": 2940874,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389492.964, "dur": 2.922, + "args": { + "External id": 2940875,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389495.432, "dur": 0.387, + "args": { + "External id": 2940876,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389496.132, "dur": 1.844, + "args": { + "External id": 2940877,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389497.259, "dur": 0.651, + "args": { + "External id": 2940878,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389498.368, "dur": 1.601, + "args": { + "External id": 2940879,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389499.178, "dur": 0.722, + "args": { + "External id": 2940880,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389500.311, "dur": 3.494, + "args": { + "External id": 2940881,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389502.860, "dur": 0.878, + "args": { + "External id": 2940882,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389504.199, "dur": 1.335, + "args": { + "External id": 2940883,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389504.938, "dur": 0.532, + "args": { + "External id": 2940884,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389506.002, "dur": 2.697, + "args": { + "External id": 2940885,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389506.996, "dur": 1.629, + "args": { + "External id": 2940886,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389510.655, "dur": 1.355, + "args": { + "External id": 2940887,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389511.406, "dur": 0.538, + "args": { + "External id": 2940888,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389512.416, "dur": 1.658, + "args": { + "External id": 2940889,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389513.362, "dur": 0.644, + "args": { + "External id": 2940890,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389514.326, "dur": 3.172, + "args": { + "External id": 2940891,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389516.706, "dur": 0.729, + "args": { + "External id": 2940892,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389517.847, "dur": 1.972, + "args": { + "External id": 2940893,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389518.998, "dur": 0.757, + "args": { + "External id": 2940894,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389520.291, "dur": 1.940, + "args": { + "External id": 2940895,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389521.250, "dur": 0.914, + "args": { + "External id": 2940896,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389522.577, "dur": 2.880, + "args": { + "External id": 2940897,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389524.823, "dur": 0.569, + "args": { + "External id": 2940898,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389525.717, "dur": 1.633, + "args": { + "External id": 2940899,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389526.639, "dur": 0.645, + "args": { + "External id": 2940900,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389527.584, "dur": 3.120, + "args": { + "External id": 2940901,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389528.645, "dur": 1.985, + "args": { + "External id": 2940902,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389532.184, "dur": 1.447, + "args": { + "External id": 2940903,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389532.959, "dur": 0.605, + "args": { + "External id": 2940904,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389534.046, "dur": 1.581, + "args": { + "External id": 2940905,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389534.932, "dur": 0.628, + "args": { + "External id": 2940906,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389535.878, "dur": 3.359, + "args": { + "External id": 2940907,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389538.656, "dur": 0.518, + "args": { + "External id": 2940908,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389539.657, "dur": 1.731, + "args": { + "External id": 2940909,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389540.775, "dur": 0.545, + "args": { + "External id": 2940910,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389541.772, "dur": 1.836, + "args": { + "External id": 2940911,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389542.799, "dur": 0.738, + "args": { + "External id": 2940912,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389544.083, "dur": 3.578, + "args": { + "External id": 2940913,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389546.854, "dur": 0.742, + "args": { + "External id": 2940914,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389548.032, "dur": 1.656, + "args": { + "External id": 2940915,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389548.969, "dur": 0.654, + "args": { + "External id": 2940916,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389550.035, "dur": 2.717, + "args": { + "External id": 2940917,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389551.039, "dur": 1.517, + "args": { + "External id": 2940918,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389554.213, "dur": 1.757, + "args": { + "External id": 2940919,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389555.096, "dur": 0.807, + "args": { + "External id": 2940920,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389556.209, "dur": 2.026, + "args": { + "External id": 2940921,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389557.181, "dur": 0.986, + "args": { + "External id": 2940922,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389558.710, "dur": 3.150, + "args": { + "External id": 2940923,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389561.288, "dur": 0.500, + "args": { + "External id": 2940924,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389562.238, "dur": 1.637, + "args": { + "External id": 2940925,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389563.189, "dur": 0.620, + "args": { + "External id": 2940926,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389564.242, "dur": 1.901, + "args": { + "External id": 2940927,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389565.309, "dur": 0.765, + "args": { + "External id": 2940928,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389587.125, "dur": 3.287, + "args": { + "External id": 2947073,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389589.792, "dur": 0.553, + "args": { + "External id": 2947074,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389590.698, "dur": 1.294, + "args": { + "External id": 2947075,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389591.348, "dur": 0.582, + "args": { + "External id": 2947076,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389592.222, "dur": 2.799, + "args": { + "External id": 2947077,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389592.999, "dur": 1.949, + "args": { + "External id": 2947078,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389596.784, "dur": 1.442, + "args": { + "External id": 2947079,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389597.451, "dur": 0.708, + "args": { + "External id": 2947080,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389598.686, "dur": 1.378, + "args": { + "External id": 2947081,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389599.495, "dur": 0.502, + "args": { + "External id": 2947082,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389600.441, "dur": 2.720, + "args": { + "External id": 2947083,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389602.537, "dur": 0.560, + "args": { + "External id": 2947084,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389603.408, "dur": 1.384, + "args": { + "External id": 2947085,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389604.127, "dur": 0.600, + "args": { + "External id": 2947086,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389605.106, "dur": 1.195, + "args": { + "External id": 2947087,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389605.686, "dur": 0.546, + "args": { + "External id": 2947088,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389606.620, "dur": 2.921, + "args": { + "External id": 2947089,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389608.814, "dur": 0.660, + "args": { + "External id": 2947090,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389609.799, "dur": 1.305, + "args": { + "External id": 2947091,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389610.439, "dur": 0.596, + "args": { + "External id": 2947092,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389611.453, "dur": 2.888, + "args": { + "External id": 2947093,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389612.462, "dur": 1.806, + "args": { + "External id": 2947094,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389616.021, "dur": 1.278, + "args": { + "External id": 2947095,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389616.689, "dur": 0.545, + "args": { + "External id": 2947096,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389617.847, "dur": 1.637, + "args": { + "External id": 2947097,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389618.793, "dur": 0.619, + "args": { + "External id": 2947098,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389619.740, "dur": 3.199, + "args": { + "External id": 2947099,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389622.338, "dur": 0.530, + "args": { + "External id": 2947100,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389623.215, "dur": 1.745, + "args": { + "External id": 2947101,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389624.358, "dur": 0.535, + "args": { + "External id": 2947102,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389625.351, "dur": 1.294, + "args": { + "External id": 2947103,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389626.017, "dur": 0.557, + "args": { + "External id": 2947104,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389627.003, "dur": 3.072, + "args": { + "External id": 2947105,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389629.404, "dur": 0.597, + "args": { + "External id": 2947106,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389630.454, "dur": 1.250, + "args": { + "External id": 2947107,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389631.105, "dur": 0.533, + "args": { + "External id": 2947108,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389631.934, "dur": 2.703, + "args": { + "External id": 2947109,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389632.718, "dur": 1.847, + "args": { + "External id": 2947110,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389636.627, "dur": 1.217, + "args": { + "External id": 2947111,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389637.200, "dur": 0.578, + "args": { + "External id": 2947112,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389638.075, "dur": 2.094, + "args": { + "External id": 2947113,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389639.219, "dur": 0.886, + "args": { + "External id": 2947114,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389640.481, "dur": 2.443, + "args": { + "External id": 2947115,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389642.354, "dur": 0.504, + "args": { + "External id": 2947116,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389643.170, "dur": 1.533, + "args": { + "External id": 2947117,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389644.117, "dur": 0.521, + "args": { + "External id": 2947118,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389644.981, "dur": 1.586, + "args": { + "External id": 2947119,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389645.851, "dur": 0.646, + "args": { + "External id": 2947120,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389646.925, "dur": 3.606, + "args": { + "External id": 2947121,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389649.775, "dur": 0.692, + "args": { + "External id": 2947122,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389650.787, "dur": 1.282, + "args": { + "External id": 2947123,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389651.476, "dur": 0.527, + "args": { + "External id": 2947124,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389652.307, "dur": 3.209, + "args": { + "External id": 2947125,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389653.325, "dur": 2.000, + "args": { + "External id": 2947126,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389657.257, "dur": 1.521, + "args": { + "External id": 2947127,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389658.043, "dur": 0.671, + "args": { + "External id": 2947128,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389659.138, "dur": 1.934, + "args": { + "External id": 2947129,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389660.185, "dur": 0.820, + "args": { + "External id": 2947130,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389661.433, "dur": 2.839, + "args": { + "External id": 2947131,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389663.687, "dur": 0.520, + "args": { + "External id": 2947132,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389664.518, "dur": 1.861, + "args": { + "External id": 2947133,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389665.707, "dur": 0.602, + "args": { + "External id": 2947134,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389666.635, "dur": 1.371, + "args": { + "External id": 2947135,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389667.327, "dur": 0.615, + "args": { + "External id": 2947136,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389668.261, "dur": 2.670, + "args": { + "External id": 2947137,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389670.313, "dur": 0.556, + "args": { + "External id": 2947138,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389671.333, "dur": 1.241, + "args": { + "External id": 2947139,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389671.982, "dur": 0.530, + "args": { + "External id": 2947140,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389672.852, "dur": 2.609, + "args": { + "External id": 2947141,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389673.800, "dur": 1.589, + "args": { + "External id": 2947142,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389677.276, "dur": 1.306, + "args": { + "External id": 2947143,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389678.020, "dur": 0.498, + "args": { + "External id": 2947144,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389678.815, "dur": 1.874, + "args": { + "External id": 2947145,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389679.847, "dur": 0.774, + "args": { + "External id": 2947146,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389681.081, "dur": 2.845, + "args": { + "External id": 2947147,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389683.357, "dur": 0.505, + "args": { + "External id": 2947148,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389684.194, "dur": 1.527, + "args": { + "External id": 2947149,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389685.109, "dur": 0.548, + "args": { + "External id": 2947150,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389686.105, "dur": 1.507, + "args": { + "External id": 2947151,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389686.895, "dur": 0.650, + "args": { + "External id": 2947152,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389687.965, "dur": 3.122, + "args": { + "External id": 2947153,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389690.474, "dur": 0.551, + "args": { + "External id": 2947154,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389691.528, "dur": 1.238, + "args": { + "External id": 2947155,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389692.181, "dur": 0.520, + "args": { + "External id": 2947156,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389693.001, "dur": 2.801, + "args": { + "External id": 2947157,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389693.723, "dur": 2.004, + "args": { + "External id": 2947158,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389697.385, "dur": 1.335, + "args": { + "External id": 2947159,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389698.118, "dur": 0.540, + "args": { + "External id": 2947160,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389698.948, "dur": 1.459, + "args": { + "External id": 2947161,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389699.703, "dur": 0.637, + "args": { + "External id": 2947162,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389700.842, "dur": 3.051, + "args": { + "External id": 2947163,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389703.313, "dur": 0.518, + "args": { + "External id": 2947164,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389704.138, "dur": 1.646, + "args": { + "External id": 2947165,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389705.178, "dur": 0.542, + "args": { + "External id": 2947166,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389706.044, "dur": 1.535, + "args": { + "External id": 2947167,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389706.887, "dur": 0.625, + "args": { + "External id": 2947168,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389707.809, "dur": 3.107, + "args": { + "External id": 2947169,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389710.220, "dur": 0.629, + "args": { + "External id": 2947170,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389711.399, "dur": 1.318, + "args": { + "External id": 2947171,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389712.100, "dur": 0.552, + "args": { + "External id": 2947172,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389713.101, "dur": 2.807, + "args": { + "External id": 2947173,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389714.030, "dur": 1.808, + "args": { + "External id": 2947174,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389717.663, "dur": 1.343, + "args": { + "External id": 2947175,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389718.426, "dur": 0.519, + "args": { + "External id": 2947176,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389719.617, "dur": 1.827, + "args": { + "External id": 2947177,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389720.603, "dur": 0.779, + "args": { + "External id": 2947178,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389721.700, "dur": 2.647, + "args": { + "External id": 2947179,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389723.692, "dur": 0.594, + "args": { + "External id": 2947180,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389724.624, "dur": 1.509, + "args": { + "External id": 2947181,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389725.471, "dur": 0.593, + "args": { + "External id": 2947182,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389726.391, "dur": 1.542, + "args": { + "External id": 2947183,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389727.145, "dur": 0.726, + "args": { + "External id": 2947184,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389728.167, "dur": 2.874, + "args": { + "External id": 2947185,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389730.466, "dur": 0.513, + "args": { + "External id": 2947186,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389731.514, "dur": 1.391, + "args": { + "External id": 2947187,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389732.162, "dur": 0.670, + "args": { + "External id": 2947188,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389733.224, "dur": 3.356, + "args": { + "External id": 2947189,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389734.453, "dur": 2.056, + "args": { + "External id": 2947190,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389739.016, "dur": 1.411, + "args": { + "External id": 2947191,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389739.819, "dur": 0.546, + "args": { + "External id": 2947192,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389740.745, "dur": 1.590, + "args": { + "External id": 2947193,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389741.668, "dur": 0.599, + "args": { + "External id": 2947194,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389742.591, "dur": 3.376, + "args": { + "External id": 2947195,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389745.384, "dur": 0.509, + "args": { + "External id": 2947196,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389746.215, "dur": 1.716, + "args": { + "External id": 2947197,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389747.334, "dur": 0.530, + "args": { + "External id": 2947198,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389748.191, "dur": 1.241, + "args": { + "External id": 2947199,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389748.741, "dur": 0.623, + "args": { + "External id": 2947200,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389749.794, "dur": 2.932, + "args": { + "External id": 2947201,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389752.151, "dur": 0.511, + "args": { + "External id": 2947202,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389753.013, "dur": 2.842, + "args": { + "External id": 2947203,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389755.194, "dur": 0.592, + "args": { + "External id": 2947204,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389756.188, "dur": 2.689, + "args": { + "External id": 2947205,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389756.992, "dur": 1.811, + "args": { + "External id": 2947206,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389760.558, "dur": 1.346, + "args": { + "External id": 2947207,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389761.337, "dur": 0.504, + "args": { + "External id": 2947208,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389762.136, "dur": 1.641, + "args": { + "External id": 2947209,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389763.079, "dur": 0.631, + "args": { + "External id": 2947210,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389764.046, "dur": 2.860, + "args": { + "External id": 2947211,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389766.339, "dur": 0.505, + "args": { + "External id": 2947212,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389767.154, "dur": 1.426, + "args": { + "External id": 2947213,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389768.024, "dur": 0.494, + "args": { + "External id": 2947214,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389768.835, "dur": 1.160, + "args": { + "External id": 2947215,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389769.378, "dur": 0.549, + "args": { + "External id": 2947216,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389770.346, "dur": 3.634, + "args": { + "External id": 2947217,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389773.314, "dur": 0.602, + "args": { + "External id": 2947218,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389774.233, "dur": 1.344, + "args": { + "External id": 2947219,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389774.964, "dur": 0.549, + "args": { + "External id": 2947220,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389775.808, "dur": 3.265, + "args": { + "External id": 2947221,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389777.032, "dur": 1.776, + "args": { + "External id": 2947222,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389780.602, "dur": 1.113, + "args": { + "External id": 2947223,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389781.160, "dur": 0.486, + "args": { + "External id": 2947224,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389781.947, "dur": 1.555, + "args": { + "External id": 2947225,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389782.770, "dur": 0.664, + "args": { + "External id": 2947226,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389783.861, "dur": 2.896, + "args": { + "External id": 2947227,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389786.157, "dur": 0.530, + "args": { + "External id": 2947228,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389787.006, "dur": 1.798, + "args": { + "External id": 2947229,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389788.222, "dur": 0.516, + "args": { + "External id": 2947230,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389789.156, "dur": 1.354, + "args": { + "External id": 2947231,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389789.827, "dur": 0.617, + "args": { + "External id": 2947232,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389790.833, "dur": 3.037, + "args": { + "External id": 2947233,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389793.052, "dur": 0.745, + "args": { + "External id": 2947234,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389794.323, "dur": 1.456, + "args": { + "External id": 2947235,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389794.977, "dur": 0.732, + "args": { + "External id": 2947236,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389796.008, "dur": 3.148, + "args": { + "External id": 2947237,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389796.956, "dur": 1.994, + "args": { + "External id": 2947238,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389800.728, "dur": 1.328, + "args": { + "External id": 2947239,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389801.366, "dur": 0.624, + "args": { + "External id": 2947240,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389802.301, "dur": 2.271, + "args": { + "External id": 2947241,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389803.562, "dur": 0.785, + "args": { + "External id": 2947242,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389804.834, "dur": 2.809, + "args": { + "External id": 2947243,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389806.932, "dur": 0.647, + "args": { + "External id": 2947244,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389807.894, "dur": 1.550, + "args": { + "External id": 2947245,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389808.888, "dur": 0.492, + "args": { + "External id": 2947246,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389809.808, "dur": 1.423, + "args": { + "External id": 2947247,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389810.553, "dur": 0.611, + "args": { + "External id": 2947248,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389811.464, "dur": 2.977, + "args": { + "External id": 2947249,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389813.871, "dur": 0.503, + "args": { + "External id": 2947250,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389814.735, "dur": 1.475, + "args": { + "External id": 2947251,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389815.515, "dur": 0.632, + "args": { + "External id": 2947252,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389816.453, "dur": 2.947, + "args": { + "External id": 2947253,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389817.493, "dur": 1.720, + "args": { + "External id": 2947254,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389821.463, "dur": 1.252, + "args": { + "External id": 2947255,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389822.149, "dur": 0.495, + "args": { + "External id": 2947256,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389822.947, "dur": 2.043, + "args": { + "External id": 2947257,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389824.215, "dur": 0.707, + "args": { + "External id": 2947258,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389825.245, "dur": 2.876, + "args": { + "External id": 2947259,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389827.568, "dur": 0.484, + "args": { + "External id": 2947260,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389828.369, "dur": 1.804, + "args": { + "External id": 2947261,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389829.590, "dur": 0.517, + "args": { + "External id": 2947262,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389830.583, "dur": 1.395, + "args": { + "External id": 2947263,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389831.294, "dur": 0.616, + "args": { + "External id": 2947264,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389832.334, "dur": 3.125, + "args": { + "External id": 2947265,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389834.897, "dur": 0.498, + "args": { + "External id": 2947266,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389835.844, "dur": 1.271, + "args": { + "External id": 2947267,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389836.546, "dur": 0.504, + "args": { + "External id": 2947268,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389837.466, "dur": 2.764, + "args": { + "External id": 2947269,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389838.412, "dur": 1.534, + "args": { + "External id": 2947270,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389842.207, "dur": 1.212, + "args": { + "External id": 2947271,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389842.889, "dur": 0.462, + "args": { + "External id": 2947272,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389843.777, "dur": 1.807, + "args": { + "External id": 2947273,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389844.878, "dur": 0.640, + "args": { + "External id": 2947274,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389845.839, "dur": 3.132, + "args": { + "External id": 2947275,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389848.263, "dur": 0.636, + "args": { + "External id": 2947276,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389849.217, "dur": 1.753, + "args": { + "External id": 2947277,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389850.143, "dur": 0.761, + "args": { + "External id": 2947278,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389851.223, "dur": 1.243, + "args": { + "External id": 2947279,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389851.769, "dur": 0.630, + "args": { + "External id": 2947280,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389852.825, "dur": 3.407, + "args": { + "External id": 2947281,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389855.604, "dur": 0.557, + "args": { + "External id": 2947282,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389856.585, "dur": 1.345, + "args": { + "External id": 2947283,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389857.334, "dur": 0.524, + "args": { + "External id": 2947284,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389858.361, "dur": 2.518, + "args": { + "External id": 2947285,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389859.427, "dur": 1.377, + "args": { + "External id": 2947286,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389862.396, "dur": 1.268, + "args": { + "External id": 2947287,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389863.087, "dur": 0.507, + "args": { + "External id": 2947288,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389863.917, "dur": 1.754, + "args": { + "External id": 2947289,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389864.831, "dur": 0.777, + "args": { + "External id": 2947290,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389866.145, "dur": 2.452, + "args": { + "External id": 2947291,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389868.025, "dur": 0.499, + "args": { + "External id": 2947292,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389868.869, "dur": 1.601, + "args": { + "External id": 2947293,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389869.879, "dur": 0.530, + "args": { + "External id": 2947294,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389870.725, "dur": 1.300, + "args": { + "External id": 2947295,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389871.363, "dur": 0.594, + "args": { + "External id": 2947296,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389872.255, "dur": 2.632, + "args": { + "External id": 2947297,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389874.281, "dur": 0.535, + "args": { + "External id": 2947298,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389875.232, "dur": 1.693, + "args": { + "External id": 2947299,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389876.090, "dur": 0.771, + "args": { + "External id": 2947300,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389877.156, "dur": 2.749, + "args": { + "External id": 2947301,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389878.195, "dur": 1.495, + "args": { + "External id": 2947302,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389881.484, "dur": 1.265, + "args": { + "External id": 2947303,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389882.188, "dur": 0.497, + "args": { + "External id": 2947304,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389883.189, "dur": 1.746, + "args": { + "External id": 2947305,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389884.069, "dur": 0.799, + "args": { + "External id": 2947306,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389885.189, "dur": 3.103, + "args": { + "External id": 2947307,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389887.411, "dur": 0.817, + "args": { + "External id": 2947308,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389888.541, "dur": 1.668, + "args": { + "External id": 2947309,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389889.636, "dur": 0.505, + "args": { + "External id": 2947310,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389890.465, "dur": 1.330, + "args": { + "External id": 2947311,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389891.116, "dur": 0.609, + "args": { + "External id": 2947312,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389892.024, "dur": 2.778, + "args": { + "External id": 2947313,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389894.216, "dur": 0.517, + "args": { + "External id": 2947314,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389895.210, "dur": 1.386, + "args": { + "External id": 2947315,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389895.851, "dur": 0.684, + "args": { + "External id": 2947316,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389896.828, "dur": 2.865, + "args": { + "External id": 2947317,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389897.830, "dur": 1.793, + "args": { + "External id": 2947318,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389901.141, "dur": 1.408, + "args": { + "External id": 2947319,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389901.890, "dur": 0.587, + "args": { + "External id": 2947320,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389902.778, "dur": 1.488, + "args": { + "External id": 2947321,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389903.579, "dur": 0.620, + "args": { + "External id": 2947322,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389904.522, "dur": 3.477, + "args": { + "External id": 2947323,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389907.055, "dur": 0.874, + "args": { + "External id": 2947324,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389908.248, "dur": 1.553, + "args": { + "External id": 2947325,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389909.119, "dur": 0.620, + "args": { + "External id": 2947326,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389910.056, "dur": 1.438, + "args": { + "External id": 2947327,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389910.756, "dur": 0.666, + "args": { + "External id": 2947328,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389911.725, "dur": 3.058, + "args": { + "External id": 2947329,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389914.161, "dur": 0.554, + "args": { + "External id": 2947330,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389915.143, "dur": 1.386, + "args": { + "External id": 2947331,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389915.845, "dur": 0.620, + "args": { + "External id": 2947332,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389916.781, "dur": 2.642, + "args": { + "External id": 2947333,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389917.825, "dur": 1.418, + "args": { + "External id": 2947334,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389921.203, "dur": 1.174, + "args": { + "External id": 2947335,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389921.767, "dur": 0.548, + "args": { + "External id": 2947336,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389922.607, "dur": 1.885, + "args": { + "External id": 2947337,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389923.663, "dur": 0.762, + "args": { + "External id": 2947338,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389924.748, "dur": 2.955, + "args": { + "External id": 2947339,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389927.027, "dur": 0.603, + "args": { + "External id": 2947340,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389927.973, "dur": 2.167, + "args": { + "External id": 2947341,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389929.256, "dur": 0.817, + "args": { + "External id": 2947342,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389930.393, "dur": 1.387, + "args": { + "External id": 2947343,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389931.096, "dur": 0.617, + "args": { + "External id": 2947344,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389932.010, "dur": 3.186, + "args": { + "External id": 2947345,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389934.598, "dur": 0.529, + "args": { + "External id": 2947346,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389935.461, "dur": 1.299, + "args": { + "External id": 2947347,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389936.095, "dur": 0.598, + "args": { + "External id": 2947348,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389936.989, "dur": 2.691, + "args": { + "External id": 2947349,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389937.940, "dur": 1.666, + "args": { + "External id": 2947350,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389941.366, "dur": 1.122, + "args": { + "External id": 2947351,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389941.908, "dur": 0.513, + "args": { + "External id": 2947352,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389942.733, "dur": 1.407, + "args": { + "External id": 2947353,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389943.469, "dur": 0.604, + "args": { + "External id": 2947354,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389944.396, "dur": 2.593, + "args": { + "External id": 2947355,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389946.434, "dur": 0.489, + "args": { + "External id": 2947356,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389947.233, "dur": 1.757, + "args": { + "External id": 2947357,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389948.434, "dur": 0.485, + "args": { + "External id": 2947358,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389949.349, "dur": 1.398, + "args": { + "External id": 2947359,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389950.069, "dur": 0.603, + "args": { + "External id": 2947360,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389951.127, "dur": 2.680, + "args": { + "External id": 2947361,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389953.207, "dur": 0.537, + "args": { + "External id": 2947362,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389954.059, "dur": 1.175, + "args": { + "External id": 2947363,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389954.612, "dur": 0.551, + "args": { + "External id": 2947364,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389955.467, "dur": 3.005, + "args": { + "External id": 2947365,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389956.563, "dur": 1.709, + "args": { + "External id": 2947366,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389959.999, "dur": 1.417, + "args": { + "External id": 2947367,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389960.740, "dur": 0.613, + "args": { + "External id": 2947368,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389961.645, "dur": 1.551, + "args": { + "External id": 2947369,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389962.526, "dur": 0.602, + "args": { + "External id": 2947370,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389963.564, "dur": 3.144, + "args": { + "External id": 2947371,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389965.956, "dur": 0.685, + "args": { + "External id": 2947372,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389966.962, "dur": 1.764, + "args": { + "External id": 2947373,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389968.120, "dur": 0.535, + "args": { + "External id": 2947374,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389968.978, "dur": 1.475, + "args": { + "External id": 2947375,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389969.755, "dur": 0.629, + "args": { + "External id": 2947376,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389970.693, "dur": 2.890, + "args": { + "External id": 2947377,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389972.975, "dur": 0.543, + "args": { + "External id": 2947378,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389973.971, "dur": 1.322, + "args": { + "External id": 2947379,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389974.621, "dur": 0.605, + "args": { + "External id": 2947380,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389975.525, "dur": 2.472, + "args": { + "External id": 2947381,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389976.283, "dur": 1.640, + "args": { + "External id": 2947382,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389979.840, "dur": 1.284, + "args": { + "External id": 2947383,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389980.507, "dur": 0.553, + "args": { + "External id": 2947384,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017389981.361, "dur": 28.570, + "args": { + "External id": 2947385,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017389982.369, "dur": 26.660, + "args": { + "External id": 2947386,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390011.445, "dur": 3.702, + "args": { + "External id": 2947387,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390014.495, "dur": 0.586, + "args": { + "External id": 2947388,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390015.435, "dur": 1.693, + "args": { + "External id": 2947389,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390016.618, "dur": 0.444, + "args": { + "External id": 2947390,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390017.419, "dur": 1.422, + "args": { + "External id": 2947391,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390018.268, "dur": 0.489, + "args": { + "External id": 2947392,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390019.115, "dur": 3.029, + "args": { + "External id": 2947393,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390021.568, "dur": 0.498, + "args": { + "External id": 2947394,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390022.391, "dur": 1.296, + "args": { + "External id": 2947395,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390023.077, "dur": 0.543, + "args": { + "External id": 2947396,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390023.916, "dur": 2.855, + "args": { + "External id": 2947397,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390024.920, "dur": 1.786, + "args": { + "External id": 2947398,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390028.788, "dur": 1.516, + "args": { + "External id": 2947399,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390029.642, "dur": 0.597, + "args": { + "External id": 2947400,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390030.687, "dur": 1.707, + "args": { + "External id": 2947401,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390031.622, "dur": 0.709, + "args": { + "External id": 2947402,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390032.960, "dur": 3.086, + "args": { + "External id": 2947403,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390035.327, "dur": 0.651, + "args": { + "External id": 2947404,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390036.294, "dur": 1.768, + "args": { + "External id": 2947405,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390037.291, "dur": 0.705, + "args": { + "External id": 2947406,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390038.326, "dur": 1.368, + "args": { + "External id": 2947407,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390039.042, "dur": 0.589, + "args": { + "External id": 2947408,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390039.921, "dur": 2.674, + "args": { + "External id": 2947409,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390042.013, "dur": 0.519, + "args": { + "External id": 2947410,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390042.950, "dur": 1.285, + "args": { + "External id": 2947411,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390043.653, "dur": 0.520, + "args": { + "External id": 2947412,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390044.585, "dur": 3.029, + "args": { + "External id": 2947413,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390045.540, "dur": 1.875, + "args": { + "External id": 2947414,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390049.593, "dur": 1.386, + "args": { + "External id": 2947415,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390050.238, "dur": 0.668, + "args": { + "External id": 2947416,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390051.298, "dur": 1.712, + "args": { + "External id": 2947417,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390052.273, "dur": 0.671, + "args": { + "External id": 2947418,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390053.253, "dur": 3.026, + "args": { + "External id": 2947419,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390055.596, "dur": 0.616, + "args": { + "External id": 2947420,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390056.524, "dur": 1.369, + "args": { + "External id": 2947421,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390057.387, "dur": 0.445, + "args": { + "External id": 2947422,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390058.272, "dur": 1.453, + "args": { + "External id": 2947423,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390058.994, "dur": 0.665, + "args": { + "External id": 2947424,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390060.137, "dur": 2.892, + "args": { + "External id": 2947425,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390062.415, "dur": 0.549, + "args": { + "External id": 2947426,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390063.360, "dur": 1.373, + "args": { + "External id": 2947427,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390064.038, "dur": 0.621, + "args": { + "External id": 2947428,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390064.959, "dur": 2.534, + "args": { + "External id": 2947429,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390065.680, "dur": 1.738, + "args": { + "External id": 2947430,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390069.192, "dur": 1.443, + "args": { + "External id": 2947431,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390069.825, "dur": 0.734, + "args": { + "External id": 2947432,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390070.945, "dur": 1.866, + "args": { + "External id": 2947433,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390072.019, "dur": 0.727, + "args": { + "External id": 2947434,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390073.052, "dur": 2.913, + "args": { + "External id": 2947435,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390075.234, "dur": 0.664, + "args": { + "External id": 2947436,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390076.218, "dur": 1.670, + "args": { + "External id": 2947437,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390077.311, "dur": 0.506, + "args": { + "External id": 2947438,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390078.130, "dur": 1.214, + "args": { + "External id": 2947439,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390078.683, "dur": 0.599, + "args": { + "External id": 2947440,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390079.673, "dur": 3.002, + "args": { + "External id": 2947441,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390081.936, "dur": 0.672, + "args": { + "External id": 2947442,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390082.918, "dur": 1.496, + "args": { + "External id": 2947443,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390083.686, "dur": 0.665, + "args": { + "External id": 2947444,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390084.660, "dur": 2.253, + "args": { + "External id": 2947445,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390085.495, "dur": 1.344, + "args": { + "External id": 2947446,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390088.526, "dur": 1.218, + "args": { + "External id": 2947447,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390089.082, "dur": 0.594, + "args": { + "External id": 2947448,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390090.075, "dur": 1.637, + "args": { + "External id": 2947449,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390090.991, "dur": 0.657, + "args": { + "External id": 2947450,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390091.966, "dur": 2.578, + "args": { + "External id": 2947451,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390093.947, "dur": 0.530, + "args": { + "External id": 2947452,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390094.815, "dur": 1.463, + "args": { + "External id": 2947453,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390095.697, "dur": 0.516, + "args": { + "External id": 2947454,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390096.524, "dur": 1.459, + "args": { + "External id": 2947455,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390097.303, "dur": 0.617, + "args": { + "External id": 2947456,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390098.210, "dur": 2.736, + "args": { + "External id": 2947457,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390100.367, "dur": 0.506, + "args": { + "External id": 2947458,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336755, "tid": 1336755, + "ts": 1555017390101.184, "dur": 2.429, + "args": { + "External id": 2947459,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390102.983, "dur": 0.567, + "args": { + "External id": 2947460,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 1336755, "tid": 1336755, + "ts": 1555017390124.177, "dur": 140.515, + "args": { + "External id": 2947461,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 1336755, "tid": 1336755, + "ts": 1555017390363.788, "dur": 131.148, + "args": { + "External id": 2947462,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[291], [], [], [], []], "Ev Idx": 11293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 1336755, "tid": 1336755, + "ts": 1555017390422.091, "dur": 51.790, + "args": { + "External id": 2947463,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[291], [], [], [], []], "Ev Idx": 11294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336755, "tid": 1336755, + "ts": 1555017390437.541, "dur": 1.145, + "args": { + "External id": 2947464,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 1336755, "tid": 1336755, + "ts": 1555017390886.374, "dur": 1017.622, + "args": { + "External id": 2947465,"Sequence number": 29294796, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 1336755, "tid": 1336755, + "ts": 1555017390936.576, "dur": 135.168, + "args": { + "External id": 2947466,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017390940.301, "dur": 1.379, + "args": { + "External id": 2947467,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017391022.898, "dur": 1.437, + "args": { + "External id": 2947468,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 11299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 1336755, "tid": 1336755, + "ts": 1555017391098.133, "dur": 508.261, + "args": { + "External id": 2947469,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336755, "tid": 1336755, + "ts": 1555017391102.183, "dur": 61.679, + "args": { + "External id": 2947470,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336755, "tid": 1336755, + "ts": 1555017391105.583, "dur": 9.685, + "args": { + "External id": 2947471,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555017391110.875, "dur": 3.663, + "args": { + "External id": 2947472,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336755, "tid": 1336755, + "ts": 1555017391116.811, "dur": 46.209, + "args": { + "External id": 2947473,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 1336755, "tid": 1336755, + "ts": 1555017391173.286, "dur": 429.935, + "args": { + "External id": 2947474,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 11305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555017391204.383, "dur": 391.502, + "args": { + "External id": 2947475,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11306, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 1336755, "tid": 1336755, + "ts": 1555017391221.231, "dur": 368.624, + "args": { + "External id": 2947476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 1336755, "tid": 1336755, + "ts": 1555017391668.669, "dur": 197.813, + "args": { + "External id": 2947477,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 1336755, "tid": 1336755, + "ts": 1555017391756.712, "dur": 30.854, + "args": { + "External id": 2947478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336755, "tid": 1336755, + "ts": 1555017391774.513, "dur": 4.298, + "args": { + "External id": 2947479,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11310, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 1336755, "tid": 1336755, + "ts": 1555017391816.550, "dur": 43.764, + "args": { + "External id": 2947480,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017391819.139, "dur": 1.120, + "args": { + "External id": 2947481,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017391821.608, "dur": 0.447, + "args": { + "External id": 2947482,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 11313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 1336755, "tid": 1336755, + "ts": 1555017391919.680, "dur": 20.663, + "args": { + "External id": 2947483,"Sequence number": 29294797, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336755, "tid": 1336755, + "ts": 1555017391928.873, "dur": 8.256, + "args": { + "External id": 2947484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336755, "tid": 1336755, + "ts": 1555017391931.303, "dur": 5.638, + "args": { + "External id": 2947485,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336755, "tid": 1336755, + "ts": 1555017392366.329, "dur": 40.179, + "args": { + "External id": 2947486,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 1336755, "tid": 1336755, + "ts": 1555017392416.962, "dur": 20.799, + "args": { + "External id": 2947487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 1336755, "tid": 1336755, + "ts": 1555017392444.699, "dur": 23.304, + "args": { + "External id": 2947488,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 1336755, "tid": 1336755, + "ts": 1555017392479.859, "dur": 21.986, + "args": { + "External id": 2947489,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017392482.823, "dur": 0.517, + "args": { + "External id": 2947490,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 11321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336755, "tid": 1336755, + "ts": 1555017392517.674, "dur": 0.705, + "args": { + "External id": 2947491,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 11322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 1336755, "tid": 1336755, + "ts": 1555017392647.562, "dur": 740.726, + "args": { + "External id": 2947492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 1336755, "tid": 1336755, + "ts": 1555017393189.914, "dur": 166.467, + "args": { + "External id": 2947493,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 1336755, "tid": 1336755, + "ts": 1555017393433.637, "dur": 29.450, + "args": { + "External id": 2947494,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336755, "tid": 1336755, + "ts": 1555017393437.325, "dur": 25.127, + "args": { + "External id": 2947495,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336755, "tid": 1336755, + "ts": 1555017393467.609, "dur": 1383.355, + "args": { + "External id": 2947496,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1336755, + "ts": 1555017393469.407, "dur": 1381.316, + "args": { + "External id": 2947497,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1336755, + "ts": 1555017393471.186, "dur": 1378.643, + "args": { + "External id": 2947498,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 1336755, "tid": 1336755, + "ts": 1555017394857.832, "dur": 64.728, + "args": { + "External id": 2947499,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555017394860.879, "dur": 36.692, + "args": { + "External id": 2947500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336755, "tid": 1336755, + "ts": 1555017394867.789, "dur": 3.373, + "args": { + "External id": 2947501,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336755, "tid": 1336755, + "ts": 1555017394872.967, "dur": 24.251, + "args": { + "External id": 2947502,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 11333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336755, "tid": 1336755, + "ts": 1555017394879.850, "dur": 2.681, + "args": { + "External id": 2947503,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 11334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336755, "tid": 1336755, + "ts": 1555017394899.353, "dur": 22.305, + "args": { + "External id": 2947504,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336755, "tid": 1336755, + "ts": 1555017394925.003, "dur": 37.524, + "args": { + "External id": 2947505,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336755, "tid": 1336755, + "ts": 1555017394926.119, "dur": 36.262, + "args": { + "External id": 2947506,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336755, "tid": 1336755, + "ts": 1555017394926.893, "dur": 35.122, + "args": { + "External id": 2947507,"Sequence number": 29294798, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11338 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 1336755, "tid": 1336755, + "ts": 1555017395024.394, "dur": 5537.212, + "args": { + "External id": 2947508,"Record function id": 0, "Ev Idx": 11339 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 1336755, "tid": 1336755, + "ts": 1555017395059.679, "dur": 5481.332, + "args": { + "External id": 2947509,"Record function id": 0, "Ev Idx": 11340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 1336755, "tid": 1336755, + "ts": 1555017396254.679, "dur": 268.685, + "args": { + "External id": 2947510,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396274.145, "dur": 1.073, + "args": { + "External id": 2947511,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396276.121, "dur": 0.235, + "args": { + "External id": 2947512,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396276.938, "dur": 0.220, + "args": { + "External id": 2947513,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396277.590, "dur": 0.215, + "args": { + "External id": 2947514,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396278.306, "dur": 0.201, + "args": { + "External id": 2947515,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396278.891, "dur": 0.247, + "args": { + "External id": 2947516,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396279.523, "dur": 0.066, + "args": { + "External id": 2947517,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396279.958, "dur": 0.116, + "args": { + "External id": 2947518,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396280.593, "dur": 0.203, + "args": { + "External id": 2947519,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396281.452, "dur": 0.079, + "args": { + "External id": 2947520,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396281.983, "dur": 0.087, + "args": { + "External id": 2947521,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396282.500, "dur": 0.095, + "args": { + "External id": 2947522,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396282.989, "dur": 0.205, + "args": { + "External id": 2947523,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396283.519, "dur": 0.208, + "args": { + "External id": 2947524,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396284.251, "dur": 0.068, + "args": { + "External id": 2947525,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396284.595, "dur": 0.197, + "args": { + "External id": 2947526,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396285.245, "dur": 0.076, + "args": { + "External id": 2947527,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396285.905, "dur": 0.197, + "args": { + "External id": 2947528,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396286.607, "dur": 0.073, + "args": { + "External id": 2947529,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396286.967, "dur": 0.067, + "args": { + "External id": 2947530,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396287.511, "dur": 0.068, + "args": { + "External id": 2947531,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396287.891, "dur": 0.072, + "args": { + "External id": 2947532,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396288.362, "dur": 0.069, + "args": { + "External id": 2947533,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396288.780, "dur": 0.069, + "args": { + "External id": 2947534,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396289.200, "dur": 0.071, + "args": { + "External id": 2947535,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396289.721, "dur": 0.065, + "args": { + "External id": 2947536,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396290.232, "dur": 0.063, + "args": { + "External id": 2947537,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396290.714, "dur": 0.053, + "args": { + "External id": 2947538,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396291.292, "dur": 0.072, + "args": { + "External id": 2947539,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396291.805, "dur": 0.073, + "args": { + "External id": 2947540,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396292.230, "dur": 0.066, + "args": { + "External id": 2947541,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396292.627, "dur": 0.069, + "args": { + "External id": 2947542,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396293.103, "dur": 0.068, + "args": { + "External id": 2947543,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396293.570, "dur": 0.068, + "args": { + "External id": 2947544,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396294.199, "dur": 0.069, + "args": { + "External id": 2947545,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396294.874, "dur": 0.054, + "args": { + "External id": 2947546,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396295.324, "dur": 0.073, + "args": { + "External id": 2947547,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396295.851, "dur": 0.052, + "args": { + "External id": 2947548,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396296.326, "dur": 0.071, + "args": { + "External id": 2947549,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396296.807, "dur": 0.085, + "args": { + "External id": 2947550,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396297.439, "dur": 0.083, + "args": { + "External id": 2947551,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396298.070, "dur": 0.191, + "args": { + "External id": 2947552,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396298.706, "dur": 0.224, + "args": { + "External id": 2947553,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396299.383, "dur": 0.107, + "args": { + "External id": 2947554,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396299.877, "dur": 0.209, + "args": { + "External id": 2947555,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396300.478, "dur": 0.100, + "args": { + "External id": 2947556,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396301.046, "dur": 0.070, + "args": { + "External id": 2947557,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396301.471, "dur": 0.080, + "args": { + "External id": 2947558,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396301.883, "dur": 0.076, + "args": { + "External id": 2947559,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396302.326, "dur": 0.214, + "args": { + "External id": 2947560,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396302.971, "dur": 0.068, + "args": { + "External id": 2947561,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396303.559, "dur": 0.063, + "args": { + "External id": 2947562,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396304.005, "dur": 0.062, + "args": { + "External id": 2947563,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396304.467, "dur": 0.056, + "args": { + "External id": 2947564,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396304.930, "dur": 0.053, + "args": { + "External id": 2947565,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396305.392, "dur": 0.064, + "args": { + "External id": 2947566,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396305.799, "dur": 0.066, + "args": { + "External id": 2947567,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396306.250, "dur": 0.054, + "args": { + "External id": 2947568,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396306.638, "dur": 0.065, + "args": { + "External id": 2947569,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396307.065, "dur": 0.071, + "args": { + "External id": 2947570,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396307.651, "dur": 0.074, + "args": { + "External id": 2947571,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396308.095, "dur": 0.073, + "args": { + "External id": 2947572,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396308.540, "dur": 0.067, + "args": { + "External id": 2947573,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396308.926, "dur": 0.070, + "args": { + "External id": 2947574,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396309.455, "dur": 0.068, + "args": { + "External id": 2947575,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396310.020, "dur": 0.065, + "args": { + "External id": 2947576,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396310.560, "dur": 0.068, + "args": { + "External id": 2947577,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396311.100, "dur": 0.069, + "args": { + "External id": 2947578,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396311.503, "dur": 0.321, + "args": { + "External id": 2947579,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396312.323, "dur": 0.213, + "args": { + "External id": 2947580,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396312.974, "dur": 0.068, + "args": { + "External id": 2947581,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396313.419, "dur": 0.068, + "args": { + "External id": 2947582,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396314.087, "dur": 0.069, + "args": { + "External id": 2947583,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396314.516, "dur": 0.068, + "args": { + "External id": 2947584,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396337.888, "dur": 0.061, + "args": { + "External id": 2947585,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396338.582, "dur": 0.052, + "args": { + "External id": 2947586,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396339.033, "dur": 0.069, + "args": { + "External id": 2947587,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396339.426, "dur": 0.066, + "args": { + "External id": 2947588,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396339.818, "dur": 0.070, + "args": { + "External id": 2947589,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396340.146, "dur": 0.064, + "args": { + "External id": 2947590,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396340.579, "dur": 0.070, + "args": { + "External id": 2947591,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396341.000, "dur": 0.067, + "args": { + "External id": 2947592,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396341.534, "dur": 0.068, + "args": { + "External id": 2947593,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396341.994, "dur": 0.071, + "args": { + "External id": 2947594,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396342.550, "dur": 0.070, + "args": { + "External id": 2947595,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396343.134, "dur": 0.091, + "args": { + "External id": 2947596,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396343.691, "dur": 0.056, + "args": { + "External id": 2947597,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396344.083, "dur": 0.071, + "args": { + "External id": 2947598,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396344.478, "dur": 0.075, + "args": { + "External id": 2947599,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396344.898, "dur": 0.065, + "args": { + "External id": 2947600,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396345.314, "dur": 0.062, + "args": { + "External id": 2947601,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396345.744, "dur": 0.069, + "args": { + "External id": 2947602,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396346.215, "dur": 0.068, + "args": { + "External id": 2947603,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396346.615, "dur": 0.060, + "args": { + "External id": 2947604,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396347.214, "dur": 0.063, + "args": { + "External id": 2947605,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396347.698, "dur": 0.067, + "args": { + "External id": 2947606,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396348.111, "dur": 0.067, + "args": { + "External id": 2947607,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396348.522, "dur": 0.070, + "args": { + "External id": 2947608,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396349.071, "dur": 0.065, + "args": { + "External id": 2947609,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396349.431, "dur": 0.068, + "args": { + "External id": 2947610,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396349.821, "dur": 0.067, + "args": { + "External id": 2947611,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396350.218, "dur": 0.062, + "args": { + "External id": 2947612,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396350.620, "dur": 0.064, + "args": { + "External id": 2947613,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396351.204, "dur": 0.065, + "args": { + "External id": 2947614,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396351.711, "dur": 0.092, + "args": { + "External id": 2947615,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396352.121, "dur": 0.065, + "args": { + "External id": 2947616,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396352.461, "dur": 0.059, + "args": { + "External id": 2947617,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396352.865, "dur": 0.064, + "args": { + "External id": 2947618,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396353.438, "dur": 0.051, + "args": { + "External id": 2947619,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396353.862, "dur": 0.060, + "args": { + "External id": 2947620,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396354.309, "dur": 0.065, + "args": { + "External id": 2947621,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396354.887, "dur": 0.063, + "args": { + "External id": 2947622,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396355.560, "dur": 0.065, + "args": { + "External id": 2947623,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396356.099, "dur": 0.066, + "args": { + "External id": 2947624,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396356.649, "dur": 0.066, + "args": { + "External id": 2947625,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396357.200, "dur": 0.068, + "args": { + "External id": 2947626,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396357.733, "dur": 0.067, + "args": { + "External id": 2947627,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396358.273, "dur": 0.066, + "args": { + "External id": 2947628,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396358.672, "dur": 0.068, + "args": { + "External id": 2947629,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396359.168, "dur": 0.068, + "args": { + "External id": 2947630,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396359.591, "dur": 0.069, + "args": { + "External id": 2947631,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396360.070, "dur": 0.068, + "args": { + "External id": 2947632,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396360.513, "dur": 0.065, + "args": { + "External id": 2947633,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396361.047, "dur": 0.066, + "args": { + "External id": 2947634,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396361.666, "dur": 0.059, + "args": { + "External id": 2947635,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396362.226, "dur": 0.049, + "args": { + "External id": 2947636,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396362.596, "dur": 0.064, + "args": { + "External id": 2947637,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396368.180, "dur": 0.092, + "args": { + "External id": 2947638,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396368.679, "dur": 0.074, + "args": { + "External id": 2947639,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396369.225, "dur": 0.068, + "args": { + "External id": 2947640,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396369.662, "dur": 0.069, + "args": { + "External id": 2947641,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396370.208, "dur": 0.065, + "args": { + "External id": 2947642,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396370.592, "dur": 0.063, + "args": { + "External id": 2947643,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396370.985, "dur": 0.068, + "args": { + "External id": 2947644,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396371.416, "dur": 0.070, + "args": { + "External id": 2947645,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396372.101, "dur": 0.067, + "args": { + "External id": 2947646,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396372.539, "dur": 0.057, + "args": { + "External id": 2947647,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396372.981, "dur": 0.070, + "args": { + "External id": 2947648,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396373.420, "dur": 0.069, + "args": { + "External id": 2947649,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396373.773, "dur": 0.065, + "args": { + "External id": 2947650,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396374.279, "dur": 0.069, + "args": { + "External id": 2947651,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396374.826, "dur": 0.053, + "args": { + "External id": 2947652,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396375.175, "dur": 0.066, + "args": { + "External id": 2947653,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396375.695, "dur": 0.065, + "args": { + "External id": 2947654,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396376.079, "dur": 0.067, + "args": { + "External id": 2947655,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396376.662, "dur": 0.067, + "args": { + "External id": 2947656,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396377.372, "dur": 0.068, + "args": { + "External id": 2947657,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396377.724, "dur": 0.068, + "args": { + "External id": 2947658,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396378.051, "dur": 0.064, + "args": { + "External id": 2947659,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396378.469, "dur": 0.066, + "args": { + "External id": 2947660,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396378.878, "dur": 0.066, + "args": { + "External id": 2947661,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396379.464, "dur": 0.069, + "args": { + "External id": 2947662,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396380.007, "dur": 0.069, + "args": { + "External id": 2947663,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396380.409, "dur": 0.065, + "args": { + "External id": 2947664,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396380.846, "dur": 0.067, + "args": { + "External id": 2947665,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396381.206, "dur": 0.063, + "args": { + "External id": 2947666,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396381.771, "dur": 0.068, + "args": { + "External id": 2947667,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396382.384, "dur": 0.067, + "args": { + "External id": 2947668,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396382.857, "dur": 0.068, + "args": { + "External id": 2947669,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396383.287, "dur": 0.064, + "args": { + "External id": 2947670,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396383.700, "dur": 0.092, + "args": { + "External id": 2947671,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396384.124, "dur": 0.066, + "args": { + "External id": 2947672,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396384.651, "dur": 0.067, + "args": { + "External id": 2947673,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396385.165, "dur": 0.065, + "args": { + "External id": 2947674,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396385.657, "dur": 0.049, + "args": { + "External id": 2947675,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396386.050, "dur": 0.069, + "args": { + "External id": 2947676,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396386.439, "dur": 0.069, + "args": { + "External id": 2947677,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396386.958, "dur": 0.071, + "args": { + "External id": 2947678,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396387.473, "dur": 0.065, + "args": { + "External id": 2947679,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396387.935, "dur": 0.087, + "args": { + "External id": 2947680,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396388.386, "dur": 0.063, + "args": { + "External id": 2947681,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396388.925, "dur": 0.067, + "args": { + "External id": 2947682,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396389.338, "dur": 0.068, + "args": { + "External id": 2947683,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396389.838, "dur": 0.065, + "args": { + "External id": 2947684,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396390.226, "dur": 0.065, + "args": { + "External id": 2947685,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396390.732, "dur": 0.067, + "args": { + "External id": 2947686,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396391.134, "dur": 0.070, + "args": { + "External id": 2947687,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396391.575, "dur": 0.067, + "args": { + "External id": 2947688,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396392.133, "dur": 0.069, + "args": { + "External id": 2947689,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396392.656, "dur": 0.063, + "args": { + "External id": 2947690,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396393.194, "dur": 0.072, + "args": { + "External id": 2947691,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396393.599, "dur": 0.064, + "args": { + "External id": 2947692,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396394.231, "dur": 0.067, + "args": { + "External id": 2947693,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396394.779, "dur": 0.067, + "args": { + "External id": 2947694,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396395.209, "dur": 0.071, + "args": { + "External id": 2947695,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396395.767, "dur": 0.070, + "args": { + "External id": 2947696,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396396.188, "dur": 0.068, + "args": { + "External id": 2947697,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396396.558, "dur": 0.066, + "args": { + "External id": 2947698,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396397.226, "dur": 0.071, + "args": { + "External id": 2947699,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396397.600, "dur": 0.065, + "args": { + "External id": 2947700,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396398.069, "dur": 0.055, + "args": { + "External id": 2947701,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396398.564, "dur": 0.067, + "args": { + "External id": 2947702,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396399.289, "dur": 0.066, + "args": { + "External id": 2947703,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396399.747, "dur": 0.065, + "args": { + "External id": 2947704,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396400.294, "dur": 0.064, + "args": { + "External id": 2947705,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396400.750, "dur": 0.067, + "args": { + "External id": 2947706,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396401.211, "dur": 0.070, + "args": { + "External id": 2947707,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396401.759, "dur": 0.065, + "args": { + "External id": 2947708,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396402.308, "dur": 0.053, + "args": { + "External id": 2947709,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396402.710, "dur": 0.067, + "args": { + "External id": 2947710,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396403.093, "dur": 0.069, + "args": { + "External id": 2947711,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396403.471, "dur": 0.066, + "args": { + "External id": 2947712,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396403.908, "dur": 0.068, + "args": { + "External id": 2947713,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396404.364, "dur": 0.072, + "args": { + "External id": 2947714,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396404.827, "dur": 0.068, + "args": { + "External id": 2947715,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396405.144, "dur": 0.058, + "args": { + "External id": 2947716,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396405.678, "dur": 0.069, + "args": { + "External id": 2947717,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396406.108, "dur": 0.067, + "args": { + "External id": 2947718,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396406.440, "dur": 0.069, + "args": { + "External id": 2947719,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396407.010, "dur": 0.055, + "args": { + "External id": 2947720,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396407.606, "dur": 0.055, + "args": { + "External id": 2947721,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396407.969, "dur": 0.068, + "args": { + "External id": 2947722,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396408.467, "dur": 0.070, + "args": { + "External id": 2947723,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396409.068, "dur": 0.052, + "args": { + "External id": 2947724,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396409.559, "dur": 0.068, + "args": { + "External id": 2947725,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396410.119, "dur": 0.071, + "args": { + "External id": 2947726,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396410.564, "dur": 0.071, + "args": { + "External id": 2947727,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396410.953, "dur": 0.063, + "args": { + "External id": 2947728,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396411.348, "dur": 0.064, + "args": { + "External id": 2947729,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396411.860, "dur": 0.068, + "args": { + "External id": 2947730,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396412.215, "dur": 0.067, + "args": { + "External id": 2947731,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396412.673, "dur": 0.069, + "args": { + "External id": 2947732,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396413.006, "dur": 0.064, + "args": { + "External id": 2947733,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396413.422, "dur": 0.066, + "args": { + "External id": 2947734,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396413.770, "dur": 0.067, + "args": { + "External id": 2947735,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396414.153, "dur": 0.063, + "args": { + "External id": 2947736,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396414.709, "dur": 0.060, + "args": { + "External id": 2947737,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396415.328, "dur": 0.065, + "args": { + "External id": 2947738,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396415.753, "dur": 0.050, + "args": { + "External id": 2947739,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396416.410, "dur": 0.069, + "args": { + "External id": 2947740,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396416.746, "dur": 0.052, + "args": { + "External id": 2947741,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396417.443, "dur": 0.067, + "args": { + "External id": 2947742,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396417.764, "dur": 0.052, + "args": { + "External id": 2947743,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396418.386, "dur": 0.069, + "args": { + "External id": 2947744,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396418.710, "dur": 0.056, + "args": { + "External id": 2947745,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396419.197, "dur": 0.070, + "args": { + "External id": 2947746,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396419.520, "dur": 0.050, + "args": { + "External id": 2947747,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396420.075, "dur": 0.070, + "args": { + "External id": 2947748,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396420.403, "dur": 0.052, + "args": { + "External id": 2947749,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396421.075, "dur": 0.055, + "args": { + "External id": 2947750,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396421.390, "dur": 0.055, + "args": { + "External id": 2947751,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396421.862, "dur": 0.070, + "args": { + "External id": 2947752,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396422.183, "dur": 0.055, + "args": { + "External id": 2947753,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396422.726, "dur": 0.067, + "args": { + "External id": 2947754,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396423.056, "dur": 0.053, + "args": { + "External id": 2947755,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396423.553, "dur": 0.064, + "args": { + "External id": 2947756,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396423.877, "dur": 0.053, + "args": { + "External id": 2947757,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396424.602, "dur": 0.067, + "args": { + "External id": 2947758,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396424.926, "dur": 0.054, + "args": { + "External id": 2947759,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396425.558, "dur": 0.067, + "args": { + "External id": 2947760,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396425.885, "dur": 0.054, + "args": { + "External id": 2947761,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396426.641, "dur": 0.067, + "args": { + "External id": 2947762,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396426.963, "dur": 0.051, + "args": { + "External id": 2947763,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396427.595, "dur": 0.066, + "args": { + "External id": 2947764,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396427.918, "dur": 0.048, + "args": { + "External id": 2947765,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396428.566, "dur": 0.069, + "args": { + "External id": 2947766,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396428.890, "dur": 0.053, + "args": { + "External id": 2947767,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396429.451, "dur": 0.065, + "args": { + "External id": 2947768,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396429.768, "dur": 0.051, + "args": { + "External id": 2947769,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396430.435, "dur": 0.067, + "args": { + "External id": 2947770,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396430.757, "dur": 0.063, + "args": { + "External id": 2947771,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396431.413, "dur": 0.071, + "args": { + "External id": 2947772,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396431.736, "dur": 0.080, + "args": { + "External id": 2947773,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396432.349, "dur": 0.067, + "args": { + "External id": 2947774,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396432.673, "dur": 0.049, + "args": { + "External id": 2947775,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396433.349, "dur": 0.050, + "args": { + "External id": 2947776,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396433.652, "dur": 0.057, + "args": { + "External id": 2947777,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396434.219, "dur": 0.070, + "args": { + "External id": 2947778,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396434.545, "dur": 0.059, + "args": { + "External id": 2947779,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396435.038, "dur": 0.068, + "args": { + "External id": 2947780,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396435.361, "dur": 0.051, + "args": { + "External id": 2947781,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396435.986, "dur": 0.066, + "args": { + "External id": 2947782,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396436.312, "dur": 0.057, + "args": { + "External id": 2947783,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396436.930, "dur": 0.071, + "args": { + "External id": 2947784,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396437.262, "dur": 0.053, + "args": { + "External id": 2947785,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396437.806, "dur": 0.067, + "args": { + "External id": 2947786,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396438.128, "dur": 0.051, + "args": { + "External id": 2947787,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396438.835, "dur": 0.056, + "args": { + "External id": 2947788,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396439.149, "dur": 0.051, + "args": { + "External id": 2947789,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396439.638, "dur": 0.067, + "args": { + "External id": 2947790,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396439.958, "dur": 0.051, + "args": { + "External id": 2947791,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396440.487, "dur": 0.071, + "args": { + "External id": 2947792,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396440.811, "dur": 0.051, + "args": { + "External id": 2947793,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396441.551, "dur": 0.057, + "args": { + "External id": 2947794,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396441.862, "dur": 0.055, + "args": { + "External id": 2947795,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396442.349, "dur": 0.068, + "args": { + "External id": 2947796,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396442.670, "dur": 0.052, + "args": { + "External id": 2947797,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396443.333, "dur": 0.064, + "args": { + "External id": 2947798,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396443.650, "dur": 0.051, + "args": { + "External id": 2947799,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396444.179, "dur": 0.069, + "args": { + "External id": 2947800,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336755, "tid": 1336755, + "ts": 1555017396444.502, "dur": 0.054, + "args": { + "External id": 2947801,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 1336755, "tid": 1336755, + "ts": 1555017397022.617, "dur": 3428.154, + "args": { + "External id": 2947802,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "8.0759099431723818e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 1336755, "tid": 1336755, + "ts": 1555017399935.512, "dur": 359.579, + "args": { + "External id": 2947803,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "8.0759099431723818e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11634 + } + }, + { + "name": "process_name", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 0, + "args": { + "sort_index": 1336755 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 1381158, + "args": { + "name": "thread 1381158 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 1381158, + "args": { + "sort_index": 1381158 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 1381158, + "args": { + "name": "thread 1381158 (pt_autograd_2)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 1381158, + "args": { + "sort_index": 1381158 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 1336755, + "args": { + "name": "thread 1336755 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 1555015267527.841, "pid": 1336755, "tid": 1336755, + "args": { + "sort_index": 1336755 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 1555015267461.734, "dur": 2134711.382, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 1555015267461.734, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1555015267461.734 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 1555017466014.211 + } + ], + "traceName": "exp/mtp.1B.batch16.seqlen4096.context4096.warmup2000.update1.steps200000.lr2e-4.cosine/profile_trace/iteration_121856/rank2_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file