NVIDIA-AI-IOT / Lidar_AI_Solution

A project demonstrating Lidar related AI solutions, including three GPU accelerated Lidar/camera DL networks (PointPillars, CenterPoint, BEVFusion) and the related libs (cuPCL, 3D SparseConvolution, YUV2RGB, cuOSD,).

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

cuOSD Error: cudaErrorMisalignedAddress

NishinoLangley opened this issue · comments

Hi, thank you for your excellent work.

I have made some changes in libraries/cuOSD/test/main.cpp for testing frequent cuosd_draw_text operations. However, an error occurred during program execution.

Code:

static int simple_test() {
    int i;
    for (i = 0; i < 10000; ++i){
        cudaStream_t stream = nullptr;
        checkRuntime(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));

        printf("Working......%d\n", i);

        auto context = cuosd_context_create();
        gpu::Image* image = gpu::create_image(3840, 2160, gpu::ImageFormat::PitchLinearNV12);
        gpu::set_color(image, 255, 255, 255, 255, stream);
        gpu::copy_yuvnv12_to(image, 0, 0, 3840, 2160, "data/image/nv12_3840x2160.yuv", 3840, 2160, 180, stream);
        
        cuosd_draw_text(context, 
            "Shakespearean quotes:\n"
            "\n"
            "Words cannot express true love, loyalty behavior is the best explanation.\n"
            , 13, "data/simhei.ttf", 10, 10, cuOSDColor{0, 255, 0, 255}, cuOSDColor{60, 60, 60, 200});
        cuosd_apply(context, image, stream);
        cuosd_context_destroy(context);

        checkRuntime(cudaStreamDestroy(stream));
        gpu::free_image(image);
    }
    return 0;
}

int main(int argc, char **argv)
{
    const char* cmd   = nullptr;
    if (argc < 2)
        help();

    cmd = argv[1];
    if (strcmp(cmd, "perf") == 0) {
        return perf(argc, argv);
    } else if (strcmp(cmd, "simple") == 0) {
        return simple_draw();
    } else if (strcmp(cmd, "segment") == 0) {
        return segment();
    } else if (strcmp(cmd, "segment2") == 0) {
        return segment2();
    } else if (strcmp(cmd, "polyline") == 0) {
        return polyline();
    } else if (strcmp(cmd, "comp") == 0) {
        return comp(argc, argv);
    } else if (strcmp(cmd, "simple2") == 0){
        return simple_test();
    } else {
        help();
    }
}

Except for the modifications mentioned above, no changes have been made.

Error:

Working......7790
Working......7791
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaFreeHost(host_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:78
CUDA Runtime error cudaFree(device_) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:79
CUDA Runtime error cudaStreamDestroy(stream) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/main.cpp:248
CUDA Runtime error cudaFree(image->data0) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:563
CUDA Runtime error cudaFree(image->data1) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:564
CUDA Runtime error cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/main.cpp:229
Working......7792
CUDA Runtime error cudaMalloc(&output->data0, output->stride * output->height) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:103
CUDA Runtime error cudaMalloc(&output->data1, output->stride * output->height / 2) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:104
CUDA Runtime error cudaMallocHost(&host_memory, except_size) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:313
Failed to read 8294400 byte data
CUDA Runtime error cudaFreeHost(host_memory) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file test/gpu_image.cu:318
CUDA Runtime error cudaMallocHost(&host_, size * sizeof(T)) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:69
CUDA Runtime error cudaMalloc(&device_, size * sizeof(T)) # misaligned address, code = cudaErrorMisalignedAddress [ 716 ] in file src/memory.hpp:70
Segmentation fault

Rerunning the program on a new server did not encounter the same issue again. It seems like this error is due to an incorrect configuration of the system environment.

Thanks for your effort.

@hopef Hello! I encountered the previous issue (cuOSD Error: cudaErrorMisalignedAddress) again after changing to a new server.

Originally, I was using a server with four T4 GPUs (CentOS7, Driver Version: 515.105.01, CUDA Version: 11.7), and the program crashed. After trying unsuccessfully to fix it, I tried to reproduce it on a cloud server (ubuntu20.04, Driver Version: 515.105.01, CUDA Version: 11.8), and found that the program ran normally and stably. So I naturally assumed that this problem was caused by a configuration error of the server.

However, after changing to another server with four T4 GPUs (CentOS7, Driver Version: 510.108.03, CUDA Version: 11.6), I encountered this problem again.

Could you please help me fix it? I would be very grateful.

The real cause of the error is that I frequently create and destroy CUDA streams within loops. This issue is not closely related to the deployment environment. My code has been modified and can now run stably.

New Code:

static int simple_test() {
    int i;
    cudaStream_t stream = nullptr;
    checkRuntime(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
    auto context = cuosd_context_create();

    for (i = 0; i < 10000; ++i){
        printf("Working......%d\n", i);

        gpu::Image* image = gpu::create_image(3840, 2160, gpu::ImageFormat::PitchLinearNV12);
        gpu::set_color(image, 255, 255, 255, 255, stream);
        gpu::copy_yuvnv12_to(image, 0, 0, 3840, 2160, "data/image/nv12_3840x2160.yuv", 3840, 2160, 180, stream);
        
        cuosd_draw_text(context, 
            "Shakespearean quotes:\n"
            "\n"
            "Words cannot express true love, loyalty behavior is the best explanation.\n"
            , 13, "data/simhei.ttf", 10, 10, cuOSDColor{0, 255, 0, 255}, cuOSDColor{60, 60, 60, 200});
        cuosd_apply(context, image, stream);

        gpu::free_image(image);
    }
    cuosd_context_destroy(context);
    checkRuntime(cudaStreamDestroy(stream));
    return 0;
}

Actually, cuosd_apply is an async function. It is incorrect to use cuosd_context_destroy to free the context immediately after this.

static int simple_test() {
    int i;
    for (i = 0; i < 10000; ++i){
        cudaStream_t stream = nullptr;
        checkRuntime(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));

        printf("Working......%d\n", i);

        auto context = cuosd_context_create();
        gpu::Image* image = gpu::create_image(3840, 2160, gpu::ImageFormat::PitchLinearNV12);
        gpu::set_color(image, 255, 255, 255, 255, stream);
        gpu::copy_yuvnv12_to(image, 0, 0, 3840, 2160, "data/image/nv12_3840x2160.yuv", 3840, 2160, 180, stream);
        
        cuosd_draw_text(context, 
            "Shakespearean quotes:\n"
            "\n"
            "Words cannot express true love, loyalty behavior is the best explanation.\n"
            , 13, "data/simhei.ttf", 10, 10, cuOSDColor{0, 255, 0, 255}, cuOSDColor{60, 60, 60, 200});
        cuosd_apply(context, image, stream);

        cudaStreamSynchronize(stream);   //!!!!! You should add synchronization here.
        cuosd_context_destroy(context);

        checkRuntime(cudaStreamDestroy(stream));
        gpu::free_image(image);
    }
    return 0;
}