Call Stack not being recorded for torch_tb_profiler
jeromeku opened this issue · comments
jeromeku commented
Running the example does not track stack traces even though with_stack
is set to 1
.
Here is my env:
tensorboard 2.13.0
torch 2.0.1
torch-tb-profiler 0.4.1
Here is an excerpt from the tensorboard trace:
{
"schemaVersion": 1,
"deviceProperties": [
{
"id": 0, "name": "NVIDIA RTX A6000", "totalGlobalMem": 51041271808,
"computeMajor": 8, "computeMinor": 6,
"maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 1536,
"regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32,
"sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 102400,
"numSms": 84, "sharedMemPerBlockOptin": 101376
}
],
"record_shapes": 1,
"with_stack": 1,
"profile_memory": 1,
"traceEvents": [
{
"ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: NllLossBackward0", "pid": 18462, "tid": 18549,
"ts": 1688618105515688, "dur": 176,
"args": {
"External id": 1025,"Ev Idx": 0, "Fwd thread id": 1, "Sequence number": 533
}
},
{
"ph": "X", "cat": "cpu_op", "name": "NllLossBackward0", "pid": 18462, "tid": 18549,
"ts": 1688618105515717, "dur": 136,
"args": {
"External id": 1026,"Ev Idx": 1, "Input Dims": [[]], "Input type": ["float"], "Fwd thread id": 1, "Sequence number": 533
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::nll_loss_backward", "pid": 18462, "tid": 18549,
"ts": 1688618105515737, "dur": 115,
"args": {
"External id": 1027,"Ev Idx": 2, "Input Dims": [[], [32, 1000], [32], [], [], [], []], "Input type": ["float", "float", "long int", "", "Scalar", "Scalar", "float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 18462, "tid": 18549,
"ts": 1688618105515766, "dur": 70,
"args": {
"External id": 1028,"Ev Idx": 3, "Input Dims": [[32, 1000]], "Input type": ["float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 18462, "tid": 18549,
"ts": 1688618105515771, "dur": 62,
"args": {
"External id": 1029,"Ev Idx": 4, "Input Dims": [[32, 1000], []], "Input type": ["float", "Scalar"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LogSoftmaxBackward0", "pid": 18462, "tid": 18549,
"ts": 1688618105515875, "dur": 48,
"args": {
"External id": 1030,"Ev Idx": 5, "Fwd thread id": 1, "Sequence number": 532
}
},
{
"ph": "X", "cat": "cpu_op", "name": "LogSoftmaxBackward0", "pid": 18462, "tid": 18549,
"ts": 1688618105515877, "dur": 40,
"args": {
"External id": 1031,"Ev Idx": 6, "Input Dims": [[32, 1000]], "Input type": ["float"], "Fwd thread id": 1, "Sequence number": 532
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::_log_softmax_backward_data", "pid": 18462, "tid": 18549,
"ts": 1688618105515885, "dur": 31,
"args": {
"External id": 1032,"Ev Idx": 7, "Input Dims": [[32, 1000], [32, 1000], [], []], "Input type": ["float", "float", "Scalar", "Scalar"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddmmBackward0", "pid": 18462, "tid": 18549,
"ts": 1688618105515930, "dur": 247,
"args": {
"External id": 1033,"Ev Idx": 8, "Fwd thread id": 1, "Sequence number": 531
}
},
{
"ph": "X", "cat": "cpu_op", "name": "AddmmBackward0", "pid": 18462, "tid": 18549,
"ts": 1688618105515932, "dur": 177,
"args": {
"External id": 1034,"Ev Idx": 9, "Input Dims": [[32, 1000]], "Input type": ["float"], "Fwd thread id": 1, "Sequence number": 531
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 18462, "tid": 18549,
"ts": 1688618105515937, "dur": 18,
"args": {
"External id": 1035,"Ev Idx": 10, "Input Dims": [[2048, 1000]], "Input type": ["float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 18462, "tid": 18549,
"ts": 1688618105515942, "dur": 11,
"args": {
"External id": 1036,"Ev Idx": 11, "Input Dims": [[2048, 1000], [], []], "Input type": ["float", "Scalar", "Scalar"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 18462, "tid": 18549,
"ts": 1688618105515950, "dur": 2,
"args": {
"External id": 1037,"Ev Idx": 12, "Input Dims": [[2048, 1000], [], [], []], "Input type": ["float", "", "", ""]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 18462, "tid": 18549,
"ts": 1688618105515957, "dur": 90,
"args": {
"External id": 1038,"Ev Idx": 13, "Input Dims": [[32, 1000], [1000, 2048]], "Input type": ["float", "float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 18462, "tid": 18549,
"ts": 1688618105516050, "dur": 7,
"args": {
"External id": 1039,"Ev Idx": 14, "Input Dims": [[32, 1000]], "Input type": ["float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 18462, "tid": 18549,
"ts": 1688618105516052, "dur": 4,
"args": {
"External id": 1040,"Ev Idx": 15, "Input Dims": [[32, 1000], [], []], "Input type": ["float", "Scalar", "Scalar"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 18462, "tid": 18549,
"ts": 1688618105516054, "dur": 2,
"args": {
"External id": 1041,"Ev Idx": 16, "Input Dims": [[32, 1000], [], [], []], "Input type": ["float", "", "", ""]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 18462, "tid": 18549,
"ts": 1688618105516059, "dur": 43,
"args": {
"External id": 1042,"Ev Idx": 17, "Input Dims": [[1000, 32], [32, 2048]], "Input type": ["float", "float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 18462, "tid": 18549,
"ts": 1688618105516104, "dur": 4,
"args": {
"External id": 1043,"Ev Idx": 18, "Input Dims": [[1000, 2048]], "Input type": ["float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 18462, "tid": 18549,
"ts": 1688618105516105, "dur": 2,
"args": {
"External id": 1044,"Ev Idx": 19, "Input Dims": [[1000, 2048], [], []], "Input type": ["float", "Scalar", "Scalar"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 18462, "tid": 18549,
"ts": 1688618105516106, "dur": 1,
"args": {
"External id": 1045,"Ev Idx": 20, "Input Dims": [[1000, 2048], [], [], []], "Input type": ["float", "", "", ""]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 18462, "tid": 18549,
"ts": 1688618105516113, "dur": 41,
"args": {
"External id": 1046,"Ev Idx": 21, "Input Dims": [[32, 1000], [], [], []], "Input type": ["float", "", "Scalar", ""]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 18462, "tid": 18549,
"ts": 1688618105516156, "dur": 4,
"args": {
"External id": 1047,"Ev Idx": 22, "Input Dims": [[1, 1000], []], "Input type": ["float", ""]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 18462, "tid": 18549,
"ts": 1688618105516192, "dur": 18,
"args": {
"External id": 1048,"Ev Idx": 23
}
},
{
"ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 18462, "tid": 18549,
"ts": 1688618105516195, "dur": 11,
"args": {
"External id": 1049,"Ev Idx": 24, "Input Dims": [[1000]], "Input type": ["float"]
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 18462, "tid": 18549,
"ts": 1688618105516200, "dur": 6,
"args": {
"External id": 1050,"Ev Idx": 25, "Input Dims": [[1000]], "Input type": ["float"]
}
},
When compared to the traces in the samples folder, I see that there is a call stack
entry for each operator but no such entry in the above trace.
Viewing the Operator
tab in tensorboard also shows that View Call Stack
links are all greyed out.