TiledTensor / TiledCUDA

TiledCUDA is a highly efficient kernel template library designed to elevate CUDA C’s level of abstraction for processing tiles.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Improve code quality by addressing compilation warnings

haruhi55 opened this issue · comments

There are several warnings as listed below caused by poor implementations. Address and fix these warnings.

 warning #940-D: missing return statement at end of non-void function "tiledcuda::cell::copy::<unnamed>::get_warp_offset<kMode,Shared,WarpLayout>() [with kMode=tiledcuda::cell::copy::WarpReuse::ColReuseCont, Shared=tiledcuda::cell::SharedTile<cutlass::half_t, cute::Layout<cute::tuple<cute::C<32>, cute::C<32>>, cute::tuple<cute::_1, cute::_128>>>, WarpLayout=cute::Layout<cute::tuple<cute::_2, cute::_2>, cute::tuple<cute::C<2>, cute::_1>>]"
  }
  ^
          detected during:
            instantiation of "int tiledcuda::cell::copy::<unnamed>::get_warp_offset<kMode,Shared,WarpLayout>() [with kMode=tiledcuda::cell::copy::WarpReuse::ColReuseCont, Shared=tiledcuda::cell::SharedTile<cutlass::half_t, cute::Layout<cute::tuple<cute::C<32>, cute::C<32>>, cute::tuple<cute::_1, cute::_128>>>, WarpLayout=cute::Layout<cute::tuple<cute::_2, cute::_2>, cute::tuple<cute::C<2>, cute::_1>>]" at line 380
            instantiation of "void tiledcuda::cell::copy::SharedToRegLoader<Reg_, WarpLayout_, kMode, tiledcuda::cell::copy::CopyInst::LoadMat>::operator()(const Shared &, tiledcuda::cell::copy::SharedToRegLoader<Reg_, WarpLayout_, kMode, tiledcuda::cell::copy::CopyInst::LoadMat>::Reg &) [with Reg_=tiledcuda::cell::RegTile<cutlass::half_t, cute::Layout<cute::tuple<cute::_8, cute::C<2>>, cute::tuple<cute::C<2>, cute::_1>>>, WarpLayout_=cute::Layout<cute::tuple<cute::_2, cute::_2>, cute::tuple<cute::C<2>, cute::_1>>, kMode=tiledcuda::cell::copy::WarpReuse::ColReuseCont, Shared=tiledcuda::cell::SharedTile<cutlass::half_t, cute::Layout<cute::tuple<cute::C<32>, cute::C<32>>, cute::tuple<cute::_1, cute::_128>>>]" <unnamed>::test_wmma<Element,ElementAcc,LoadSharedA,LoadSharedB,StoreSharedC,TileIteratorA,RegA,LoadRegA,TileIteratorB,RegB,LoadRegB,SharedC,RegC,StoreRegC>(const Element *, const Element *, ElementAcc *) [with Element=cutlass::half_t, ElementAcc=float, LoadSharedA=tiledcuda::cell::traits::G2S2DCopyTraits<cutlass::half_t, 64, 128, 64, 128, 128, false, tiledcuda::cell::traits::TraitsBase<cutlass::half_t>>, LoadSharedB=tiledcuda::cell::traits::G2S2DCopyTraits<cutlass::half_t, 32, 128, 32, 128, 128, false, tiledcuda::cell::traits::TraitsBase<cutlass::half_t>>, StoreSharedC=tiledcuda::cell::traits::S2G2DCopyTraits<float, 64, 32, 64, 32, 128, false, tiledcuda::cell::traits::TraitsBase<float>>, TileIteratorA=tiledcuda::cell::SharedTileIterator<tiledcuda::cell::SharedTile<cutlass::half_t, cute::Layout<cute::tuple<cute::C<64>, cute::C<128>>, cute::tuple<cute::_128, cute::_1>>>, tiledcuda::cell::TileShape<64UL, 32UL>>, RegA=tiledcuda::cell::RegTile<cutlass::half_t, cute::Layout<cute::tuple<cute::_4, cute::_8>, cute::tuple<cute::_8, cute::_1>>>, LoadRegA=tiledcuda::cell::copy::SharedToRegLoader<tiledcuda::cell::RegTile<cutlass::half_t, cute::Layout<cute::tuple<cute::_4, cute::_8>, cute::tuple<cute::_8, cute::_1>>>, cute::Layout<cute::tuple<cute::_2, cute::_2>, cute::tuple<cute::C<2>, cute::_1>>, tiledcuda::cell::copy::WarpReuse::RowReuseCont, tiledcuda::cell::copy::CopyInst::LoadMat>, TileIteratorB=tiledcuda::cell::SharedTileIterator<tiledcuda::cell::SharedTile<cutlass::half_t, cute::Layout<cute::tuple<cute::C<128>, cute::C<32>>, cute::tuple<cute::_1, cute::_128>>>, tiledcuda::cell::TileShape<32UL, 32UL>>, RegB=tiledcuda::cell::RegTile<cutlass::half_t, cute::Layout<cute::tuple<cute::_8, cute::C<2>>, cute::tuple<cute::C<2>, cute::_1>>>, LoadRegB=tiledcuda::cell::copy::SharedToRegLoader<tiledcuda::cell::RegTile<cutlass::half_t, cute::Layout<cute::tuple<cute::_8, cute::C<2>>, cute::tuple<cute::C<2>, cute::_1>>>, cute::Layout<cute::tuple<cute::_2, cute::_2>, cute::tuple<cute::C<2>, cute::_1>>, tiledcuda::cell::copy::WarpReuse::ColReuseCont, tiledcuda::cell::copy::CopyInst::LoadMat>, SharedC=tiledcuda::cell::SharedTile<float, cute::Layout<cute::tuple<cute::C<64>, cute::C<32>>, cute::tuple<cute::_32, cute::_1>>>, RegC=tiledcuda::cell::RegTile<float, cute::Layout<cute::tuple<cute::C<4>, cute::C<4>>, cute::tuple<cute::C<4>, cute::_1>>>, StoreRegC=tiledcuda::cell::copy::RegToSharedStorer<tiledcuda::cell::RegTile<float, cute::Layout<cute::tuple<cute::C<4>, cute::C<4>>, cute::tuple<cute::C<4>, cute::_1>>>, cute::Layout<cute::tuple<cute::_2, cute::_2>, cute::tuple<cute::C<2>, cute::_1>>, tiledcuda::cell::tile_layout::RegLayout::WMMA_m16n16k16,