cuOSD Error: cudaErrorMisalignedAddress
NishinoLangley opened this issue · comments
Hi, thank you for your excellent work.
I have made some changes in libraries/cuOSD/test/main.cpp for testing frequent cuosd_draw_text operations. However, an error occurred during program execution.
Code:
static int simple_test() {
int i;
for (i = 0; i < 10000; ++i){
cudaStream_t stream = nullptr;
checkRuntime(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
printf("Working......%d\n", i);
auto context = cuosd_context_create();
gpu::Image* image = gpu::create_image(3840, 2160, gpu::ImageFormat::PitchLinearNV12);
gpu::set_color(image, 255, 255, 255, 255, stream);
gpu::copy_yuvnv12_to(image, 0, 0, 3840, 2160, "data/image/nv12_3840x2160.yuv", 3840, 2160, 180, stream);
cuosd_draw_text(context,
"Shakespearean quotes:\n"
"\n"
"Words cannot express true love, loyalty behavior is the best explanation.\n"
, 13, "data/simhei.ttf", 10, 10, cuOSDColor{0, 255, 0, 255}, cuOSDColor{60, 60, 60, 200});
cuosd_apply(context, image, stream);
cuosd_context_destroy(context);
checkRuntime(cudaStreamDestroy(stream));
gpu::free_image(image);
}
return 0;
}
int main(int argc, char **argv)
{
const char* cmd = nullptr;
if (argc < 2)
help();
cmd = argv[1];
if (strcmp(cmd, "perf") == 0) {
return perf(argc, argv);
} else if (strcmp(cmd, "simple") == 0) {
return simple_draw();
} else if (strcmp(cmd, "segment") == 0) {
return segment();
} else if (strcmp(cmd, "segment2") == 0) {
return segment2();
} else if (strcmp(cmd, "polyline") == 0) {
return polyline();
} else if (strcmp(cmd, "comp") == 0) {
return comp(argc, argv);
} else if (strcmp(cmd, "simple2") == 0){
return simple_test();
} else {
help();
}
}
Except for the modifications mentioned above, no changes have been made.
Error:
Working......7790
Working......7791
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaStreamDestroy(stream) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/main.cpp:248
CUDA Runtime error cudaFree(image->data0) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:563
CUDA Runtime error cudaFree(image->data1) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:564
CUDA Runtime error cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/main.cpp:229
Working......7792
CUDA Runtime error cudaMalloc(&output->data0, output->stride * output->height) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:103
CUDA Runtime error cudaMalloc(&output->data1, output->stride * output->height / 2) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:104
CUDA Runtime error cudaMallocHost(&host_memory, except_size) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:313
Failed to read 8294400 byte data
CUDA Runtime error cudaFreeHost(host_memory) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:318
CUDA Runtime error cudaMallocHost(&host_, size * sizeof(T)) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:69
CUDA Runtime error cudaMalloc(&device_, size * sizeof(T)) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:70
Segmentation fault
Rerunning the program on a new server did not encounter the same issue again. It seems like this error is due to an incorrect configuration of the system environment.
Thanks for your effort.
@hopef Hello! I encountered the previous issue (cuOSD Error: cudaErrorMisalignedAddress) again after changing to a new server.
Originally, I was using a server with four T4 GPUs (CentOS7, Driver Version: 515.105.01, CUDA Version: 11.7), and the program crashed. After trying unsuccessfully to fix it, I tried to reproduce it on a cloud server (ubuntu20.04, Driver Version: 515.105.01, CUDA Version: 11.8), and found that the program ran normally and stably. So I naturally assumed that this problem was caused by a configuration error of the server.
However, after changing to another server with four T4 GPUs (CentOS7, Driver Version: 510.108.03, CUDA Version: 11.6), I encountered this problem again.
Could you please help me fix it? I would be very grateful.
The real cause of the error is that I frequently create and destroy CUDA streams within loops. This issue is not closely related to the deployment environment. My code has been modified and can now run stably.
New Code:
static int simple_test() {
int i;
cudaStream_t stream = nullptr;
checkRuntime(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
auto context = cuosd_context_create();
for (i = 0; i < 10000; ++i){
printf("Working......%d\n", i);
gpu::Image* image = gpu::create_image(3840, 2160, gpu::ImageFormat::PitchLinearNV12);
gpu::set_color(image, 255, 255, 255, 255, stream);
gpu::copy_yuvnv12_to(image, 0, 0, 3840, 2160, "data/image/nv12_3840x2160.yuv", 3840, 2160, 180, stream);
cuosd_draw_text(context,
"Shakespearean quotes:\n"
"\n"
"Words cannot express true love, loyalty behavior is the best explanation.\n"
, 13, "data/simhei.ttf", 10, 10, cuOSDColor{0, 255, 0, 255}, cuOSDColor{60, 60, 60, 200});
cuosd_apply(context, image, stream);
gpu::free_image(image);
}
cuosd_context_destroy(context);
checkRuntime(cudaStreamDestroy(stream));
return 0;
}
Actually, cuosd_apply
is an async function. It is incorrect to use cuosd_context_destroy to free the context immediately after this.
static int simple_test() {
int i;
for (i = 0; i < 10000; ++i){
cudaStream_t stream = nullptr;
checkRuntime(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
printf("Working......%d\n", i);
auto context = cuosd_context_create();
gpu::Image* image = gpu::create_image(3840, 2160, gpu::ImageFormat::PitchLinearNV12);
gpu::set_color(image, 255, 255, 255, 255, stream);
gpu::copy_yuvnv12_to(image, 0, 0, 3840, 2160, "data/image/nv12_3840x2160.yuv", 3840, 2160, 180, stream);
cuosd_draw_text(context,
"Shakespearean quotes:\n"
"\n"
"Words cannot express true love, loyalty behavior is the best explanation.\n"
, 13, "data/simhei.ttf", 10, 10, cuOSDColor{0, 255, 0, 255}, cuOSDColor{60, 60, 60, 200});
cuosd_apply(context, image, stream);
cudaStreamSynchronize(stream); //!!!!! You should add synchronization here.
cuosd_context_destroy(context);
checkRuntime(cudaStreamDestroy(stream));
gpu::free_image(image);
}
return 0;
}