[Issue]: segfault when calling 'llvm::Target::createTargetMachine'
fwinter opened this issue · comments
Problem Description
With ROCm 5.6.0 and later I'm getting a segfault when calling 'llvm::Target::createTargetMachine'. Earlier versions work fine.
The system:
Device name : AMD Instinct MI250X
GCN architecture : gfx910
My application uses the LLVM compiler that is bundled with the ROCm installation (/opt/rocm/llvm/). The call to 'createTargetMachine' goes like
llvm::TargetOptions options;
TargetMachine.reset(TheTarget->createTargetMachine(
"amdgcn-amd-amdhsa", // triple
"gfx90a", // CPU
"", // feature str
options,
llvm::Reloc::PIC_
));
This was never a problem. But since ROCm version 5.6.0 I get segfaults like:
Thread 1 "t_dslashm" received signal SIGSEGV, Segmentation fault.
0x00000000008a6b81 in llvm::RegisterTargetMachine<llvm::GCNTargetMachine>::Allocator(llvm::Target const&, llvm::Triple const&, llvm::StringRef, llvm::StringRef, llvm::TargetOptions const&, std::optional<llvm::Reloc::Model>, std::optional<llvm::CodeModel::Model>, llvm::CodeGenOpt::Level, bool) ()
Missing separate debuginfos, use: zypper install krb5-debuginfo-1.19.2-150400.3.6.1.x86_64 libbrotlicommon1-debuginfo-1.0.7-3.3.1.x86_64 libbrotlidec1-debuginfo-1.0.7-3.3.1.x86_64 libcom_err2-debuginfo-1.46.4-150400.3.3.1.x86_64 libcurl4-debuginfo-8.0.1-150400.5.41.1.x86_64 libelf1-debuginfo-0.185-150400.5.3.1.x86_64 libidn2-0-debuginfo-2.2.0-3.6.1.x86_64 libjitterentropy3-debuginfo-3.4.0-150000.1.9.1.x86_64 libjson-c3-debuginfo-0.13-3.3.1.x86_64 libkeyutils1-debuginfo-1.6.3-5.6.1.x86_64 libldap-2_4-2-debuginfo-2.4.46-150200.14.17.1.x86_64 liblzma5-debuginfo-5.2.3-150000.4.7.1.x86_64 libncurses6-debuginfo-6.1-150000.5.20.1.x86_64 libnghttp2-14-debuginfo-1.40.0-150200.12.1.x86_64 libnl3-200-debuginfo-3.3.0-1.29.x86_64 libnuma1-debuginfo-2.0.14.20.g4ee5e0c-150400.1.24.x86_64 libopenssl1_1-debuginfo-1.1.1l-150400.7.60.2.x86_64 libpcre1-debuginfo-8.45-150000.20.13.1.x86_64 libpsl5-debuginfo-0.20.1-150000.3.3.1.x86_64 libselinux1-debuginfo-3.1-150400.1.69.x86_64 libssh4-debuginfo-0.9.6-150400.1.5.x86_64 libunistring2-debuginfo-0.9.10-1.1.x86_64 libxml2-2-debuginfo-2.9.14-150400.5.25.1.x86_64 libyaml-0-2-debuginfo-0.1.7-1.17.x86_64 libz1-debuginfo-1.2.11-150000.3.48.1.x86_64
(gdb) bt
#0 0x00000000008a6b81 in llvm::RegisterTargetMachine<llvm::GCNTargetMachine>::Allocator(llvm::Target const&, llvm::Triple const&, llvm::StringRef, llvm::StringRef, llvm::TargetOptions const&, std::optional<llvm::Reloc::Model>, std::optional<llvm::CodeModel::Model>, llvm::CodeGenOpt::Level, bool) ()
#1 0x0000000000578e4b in llvm::Target::createTargetMachine (this=0x48a9800 <llvm::getTheGCNTarget()::TheGCNTarget>, TT=..., CPU=..., Features=..., Options=..., RM=..., CM=..., OL=llvm::CodeGenOpt::Default, JIT=false)
at /opt/rocm-5.6.0/llvm/include/llvm/MC/TargetRegistry.h:488
What am I missing?
Operating System
Linux frontier07423 5.14.21-150400.24.46_12.0.83-cray_shasta_c
CPU
AMD EPYC 7A53 64-Core Processor
GPU
AMD Instinct MI250X, AMD Instinct MI250
ROCm Version
ROCm 5.6.0
ROCm Component
llvm-project
Steps to Reproduce
No response
(Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support
No response
Additional Information
No response
An internal ticket has been created for investigation.
Here's the reproducer:
#include "llvm/InitializePasses.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include <iostream>
int main()
{
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllAsmParsers();
llvm::PassRegistry *Registry = llvm::PassRegistry::getPassRegistry();
llvm::initializeCore(*Registry);
llvm::initializeCodeGen(*Registry);
llvm::initializeLoopStrengthReducePass(*Registry);
llvm::initializeLowerIntrinsicsPass(*Registry);
llvm::initializeUnreachableBlockElimLegacyPassPass(*Registry);
llvm::initializeConstantHoistingLegacyPassPass(*Registry);
llvm::Triple TheTriple;
TheTriple.setArch (llvm::Triple::ArchType::amdgcn);
TheTriple.setVendor (llvm::Triple::VendorType::AMD);
TheTriple.setOS (llvm::Triple::OSType::AMDHSA);
std::string Error;
const llvm::Target *TheTarget = llvm::TargetRegistry::lookupTarget( TheTriple.str() , Error );
if (!TheTarget) {
std::cout << Error;
std::cout << "Something went wrong setting the target\n";
return 1;
}
llvm::TargetOptions Options;
std::unique_ptr<llvm::TargetMachine> TargetMachine;
TargetMachine.reset(TheTarget->createTargetMachine(
"amdgcn-amd-amdhsa", // triple
"gfx90a", // CPU
"" , // FeatureStr
Options,
llvm::Reloc::PIC_
)
);
std::cout << "LLVM initialization" << std::endl;
std::cout << " Target machine CPU : " << TargetMachine->getTargetCPU().str() << "\n";
std::cout << " Target triple : " << TargetMachine->getTargetTriple().str() << "\n";
return 0;
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.17)
project(SEGFAULT VERSION 0.1 LANGUAGES CXX;C)
find_package(LLVM "16.0" REQUIRED CONFIG)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in ${LLVM_DIR}")
add_executable(llvm_init llvm_init.cc)
set_target_properties( llvm_init PROPERTIES CXX_STANDARD 17 )
target_include_directories(llvm_init PUBLIC ${LLVM_INCLUDE_DIRS} )
target_link_libraries( llvm_init PUBLIC ${LLVM_AVAILABLE_LIBS} )
build.sh
export CXX=CC
export CC=cc
cmake ../
Steps to reproduce on Frontier:
module load zstd
module load rocm/5.5.1
module load PrgEnv-gnu
mkdir build; cd build
../build.sh
-- The CXX compiler identification is GNU 12.2.0
-- The C compiler identification is GNU 12.2.0
-- Cray Programming Environment 2.7.19 CXX
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /opt/cray/pe/craype/2.7.19/bin/CC - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Cray Programming Environment 2.7.19 C
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /opt/cray/pe/craype/2.7.19/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Performing Test Terminfo_LINKABLE
-- Performing Test Terminfo_LINKABLE - Success
-- Found Terminfo: /usr/lib64/libtinfo.so
-- Found ZLIB: /usr/lib64/libz.so (found version "1.2.11")
-- Found LLVM 16.0.0git
-- Using LLVMConfig.cmake in /opt/rocm-5.5.1/llvm/lib/cmake/llvm
-- Configuring done
-- Generating done
-- Build files have been written to: /ccs/home/fwinter/llvm_segfault/build
$make
[ 50%] Building CXX object CMakeFiles/llvm_init.dir/llvm_init.cc.o
[100%] Linking CXX executable llvm_init
[100%] Built target llvm_init
./llvm_init
LLVM initialization
Target machine CPU : gfx90a
Target triple : amdgcn-amd-amdhsa
module load rocm/5.6.0
(clean directory, rebuild executable)
$ ../build.sh
-- The CXX compiler identification is GNU 12.2.0
-- The C compiler identification is GNU 12.2.0
-- Cray Programming Environment 2.7.19 CXX
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /opt/cray/pe/craype/2.7.19/bin/CC - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Cray Programming Environment 2.7.19 C
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /opt/cray/pe/craype/2.7.19/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Performing Test Terminfo_LINKABLE
-- Performing Test Terminfo_LINKABLE - Success
-- Found Terminfo: /usr/lib64/libtinfo.so
-- Found ZLIB: /usr/lib64/libz.so (found version "1.2.11")
-- Found zstd: /sw/frontier/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/zstd-1.5.2-g7yosuyngtahpkmypk25drrmoykqxsr4/lib/libzstd.so
-- Found LLVM 16.0.0git
-- Using LLVMConfig.cmake in /opt/rocm-5.6.0/llvm/lib/cmake/llvm
-- Configuring done
-- Generating done
-- Build files have been written to: /ccs/home/fwinter/llvm_segfault/build
$ make
[ 50%] Building CXX object CMakeFiles/llvm_init.dir/llvm_init.cc.o
[100%] Linking CXX executable llvm_init
[100%] Built target llvm_init
$ ./llvm_init
Segmentation fault (core dumped)
Program received signal SIGSEGV, Segmentation fault.
0x0000000001055b71 in llvm::RegisterTargetMachinellvm::GCNTargetMachine::Allocator(llvm::Target const&, llvm::Triple const&, llvm::StringRef, llvm::StringRef, llvm::TargetOptions const&, std::optionalllvm::Reloc::Model, std::optionalllvm::CodeModel::Model, llvm::CodeGenOpt::Level, bool) ()
Missing separate debuginfos, use: zypper install krb5-debuginfo-1.19.2-150400.3.6.1.x86_64 libbrotlicommon1-debuginfo-1.0.7-3.3.1.x86_64 libbrotlidec1-debuginfo-1.0.7-3.3.1.x86_64 libcom_err2-debuginfo-1.46.4-150400.3.3.1.x86_64 libcurl4-debuginfo-8.0.1-150400.5.41.1.x86_64 libidn2-0-debuginfo-2.2.0-3.6.1.x86_64 libjitterentropy3-debuginfo-3.4.0-150000.1.9.1.x86_64 libjson-c3-debuginfo-0.13-3.3.1.x86_64 libkeyutils1-debuginfo-1.6.3-5.6.1.x86_64 libldap-2_4-2-debuginfo-2.4.46-150200.14.17.1.x86_64 libncurses6-debuginfo-6.1-150000.5.20.1.x86_64 libnghttp2-14-debuginfo-1.40.0-150200.12.1.x86_64 libnl3-200-debuginfo-3.3.0-1.29.x86_64 libopenssl1_1-debuginfo-1.1.1l-150400.7.60.2.x86_64 libpcre1-debuginfo-8.45-150000.20.13.1.x86_64 libpsl5-debuginfo-0.20.1-150000.3.3.1.x86_64 libselinux1-debuginfo-3.1-150400.1.69.x86_64 libssh4-debuginfo-0.9.6-150400.1.5.x86_64 libunistring2-debuginfo-0.9.10-1.1.x86_64 libyaml-0-2-debuginfo-0.1.7-1.17.x86_64 libz1-debuginfo-1.2.11-150000.3.48.1.x86_64
(gdb) bt
#0 0x0000000001055b71 in llvm::RegisterTargetMachinellvm::GCNTargetMachine::Allocator(llvm::Target const&, llvm::Triple const&, llvm::StringRef, llvm::StringRef, llvm::TargetOptions const&, std::optionalllvm::Reloc::Model, std::optionalllvm::CodeModel::Model, llvm::CodeGenOpt::Level, bool) ()
#1 0x000000000049bc93 in llvm::Target::createTargetMachine(llvm::StringRef, llvm::StringRef, llvm::StringRef, llvm::TargetOptions const&, std::optionalllvm::Reloc::Model, std::optionalllvm::CodeModel::Model, llvm::CodeGenOpt::Level, bool) const ()
#2 0x000000000049b441 in main ()
Is there any updates? I have a similar problem with x86_64 cpu too.