microsoft / BitNet

Official inference framework for 1-bit LLMs

Repository from Github https://github.commicrosoft/BitNetRepository from Github https://github.commicrosoft/BitNet

On Android device, compilation fails.

minne100 opened this issue · comments

I installed the Debian system on my Android phone using Termux, and then followed the instructions step by step until I ran
“python3 setup_env.py -md models/Falcon3-7B-Instruct-1.58bit -q i2_s”
the following error occurred.

[ 1%] Building C object 3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
cc1: warning: command-line option ‘-fpermissive’ is valid for C++/ObjC++ but not for C
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12514:6: warning: no previous prototype for ‘float_act_quant’ [-Wmissing-prototypes]
12514 | void float_act_quant(const int K, float* B, int32_t* dst, float* act_scale) {
| ^~~~~~~~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12530:6: warning: no previous prototype for ‘weight_quant_f32’ [-Wmissing-prototypes]
12530 | void weight_quant_f32(const int M, const int K, float* A, int32_t* dst, float* i2_scale) {
| ^~~~~~~~~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c: In function ‘weight_quant_f32’:
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12545:35: warning: implicit conversion from ‘float’ to ‘double’ to match other operand of binary expression [-Wdouble-promotion]
12545 | dst[i] = (double)A[i] * i2_scale[0] > 0 ? 1 : -1;
| ^
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c: At top level:
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12550:6: warning: no previous prototype for ‘weight_quant_f16’ [-Wmissing-prototypes]
12550 | void weight_quant_f16(const int M, const int K, uint16_t* A, int32_t* dst, float* i2_scale) {
| ^~~~~~~~~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c: In function ‘weight_quant_f16’:
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12566:37: warning: implicit conversion from ‘float’ to ‘double’ to match other operand of binary expression [-Wdouble-promotion]
12566 | dst[i] = (double)temp_A * i2_scale[0] > 0 ? 1 : -1;
| ^
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c: At top level:
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12571:6: warning: no previous prototype for ‘matrixMultiply_int’ [-Wmissing-prototypes]
12571 | void matrixMultiply_int(const int M, const int N, const int K, const int32_t* A, const int32_t* B, int32_t* C) {
| ^~~~~~~~~~~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c: In function ‘ggml_compute_forward_mul_mat’:
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12660:44: warning: initialization of ‘bitnet_float_type ’ {aka ‘float ’} from incompatible pointer type ‘char ’ [-Wincompatible-pointer-types]
12660 | bitnet_float_type * bitnet_f_ptr = wdata;
| ^~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12664:25: warning: pointer targets in initialization of ‘int8_t ’ {aka ‘signed char ’} from ‘char ’ differ in signedness [-Wpointer-sign]
12664 | int8_t * qlut = cur_wdata;
| ^~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12675:42: warning: passing argument 1 of ‘ggml_bitnet_transform_tensor’ discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers]
12675 | ggml_bitnet_transform_tensor(src0);
| ^~~~
In file included from /root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:50:
/root/BitNet/3rdparty/llama.cpp/ggml/src/../../../../include/ggml-bitnet.h:35:65: note: expected ‘struct ggml_tensor ’ but argument is of type ‘const struct ggml_tensor
35 | GGML_API void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor);
| ~~~~~~~~~~~~~~~~~~~~~^~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12679:51: warning: passing argument 2 of ‘ggml_fp32_to_fp16_row’ from incompatible pointer type [-Wincompatible-pointer-types]
12679 | ggml_fp32_to_fp16_row(src1->data, bitnet_f_ptr, ne10 * ne11);
| ^~~~~~~~~~~~
| |
| bitnet_float_type * {aka float }
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:541:59: note: expected ‘ggml_fp16_t ’ {aka ‘short unsigned int ’} but argument is of type ‘bitnet_float_type ’ {aka ‘float ’}
541 | void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
| ~~~~~~~~~~~~~~^
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12722:50: warning: passing argument 1 of ‘ggml_fp16_to_fp32_row’ from incompatible pointer type [-Wincompatible-pointer-types]
12722 | ggml_fp16_to_fp32_row(act_output + dst_offset, (float ) dst->data + dst_offset, ne01 / n_tile_num);
| ~~~~~~~~~~~^~~~~~~~~~~~
| |
| bitnet_float_type * {aka float }
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:535:48: note: expected ‘const ggml_fp16_t ’ {aka ‘const short unsigned int ’} but argument is of type ‘bitnet_float_type ’ {aka ‘float ’}
535 | void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
| ~~~~~~~~~~~~~~~~~~~~^
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12712:23: warning: unused variable ‘qlut_offset’ [-Wunused-variable]
12712 | const int qlut_offset = 0;
| ^~~~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12702:19: warning: unused variable ‘lut_tile_size’ [-Wunused-variable]
12702 | const int lut_tile_size = lut_size / n_tile_num;
| ^~~~~~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12666:29: warning: unused variable ‘lut_biases’ [-Wunused-variable]
12666 | bitnet_float_type * lut_biases = (bitnet_float_type ) (lut_scales + wt->lut_scales_size * ne11);
| ^~~~~~~~~~
/root/BitNet/3rdparty/llama.cpp/ggml/src/ggml.c:12653:19: warning: unused variable ‘bits’ [-Wunused-variable]
12653 | const int bits = ggml_bitnet_get_type_bits(type);
| ^~~~
[ 2%] Building C object 3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
cc1: warning: command-line option ‘-fpermissive’ is valid for C++/ObjC++ but not for C
[ 3%] Building CXX object 3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.cpp.o
[ 4%] Building C object 3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-quants.c.o
cc1: warning: command-line option ‘-fpermissive’ is valid for C++/ObjC++ but not for C
[ 5%] Building CXX object 3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/////src/ggml-bitnet-mad.cpp.o
In file included from /root/BitNet/3rdparty/llama.cpp/ggml/src/./ggml-quants.h:4,
from /root/BitNet/src/ggml-bitnet-mad.cpp:5:
/root/BitNet/3rdparty/llama.cpp/ggml/src/./ggml-common.h:154:16: warning: ISO C++ prohibits anonymous structs [-Wpedantic]
154 | struct {
| ^
/root/BitNet/3rdparty/llama.cpp/ggml/src/./ggml-common.h:175:16: warning: ISO C++ prohibits anonymous structs [-Wpedantic]
175 | struct {
| ^
/root/BitNet/3rdparty/llama.cpp/ggml/src/./ggml-common.h:196:16: warning: ISO C++ prohibits anonymous structs [-Wpedantic]
196 | struct {
| ^
/root/BitNet/3rdparty/llama.cpp/ggml/src/./ggml-common.h:261:16: warning: ISO C++ prohibits anonymous structs [-Wpedantic]
261 | struct {
| ^
/root/BitNet/3rdparty/llama.cpp/ggml/src/./ggml-common.h:294:16: warning: ISO C++ prohibits anonymous structs [-Wpedantic]
294 | struct {
| ^
/root/BitNet/3rdparty/llama.cpp/ggml/src/./ggml-common.h:311:16: warning: ISO C++ prohibits anonymous structs [-Wpedantic]
311 | struct {
| ^
/root/BitNet/src/ggml-bitnet-mad.cpp: In function ‘size_t quantize_i2_s(const float
, void
, int64_t, int64_t, const float
)’:
/root/BitNet/src/ggml-bitnet-mad.cpp:46:100: warning: unused parameter ‘quant_weights’ [-Wunused-parameter]
46 | size_t quantize_i2_s(const float * src, void * dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
| ~~~~~~~~~~~~~~^~~~~~~~~~~~~
/root/BitNet/src/ggml-bitnet-mad.cpp: In function ‘void ggml_vec_dot_i2_i8_s(int, float
, size_t, const void
, size_t, const void
, size_t, int)’:
/root/BitNet/src/ggml-bitnet-mad.cpp:95:28: warning: cast from type ‘const void
’ to type ‘uint8_t
’ {aka ‘unsigned char
’} casts away qualifiers [-Wcast-qual]
95 | const uint8_t * x = (uint8_t )vx;
| ^~~~~~~~~~~~~
/root/BitNet/src/ggml-bitnet-mad.cpp:96:28: warning: cast from type ‘const void
’ to type ‘int8_t
’ {aka ‘signed char
’} casts away qualifiers [-Wcast-qual]
96 | const int8_t * y = (int8_t )vy;
| ^~~~~~~~~~~~
/root/BitNet/src/ggml-bitnet-mad.cpp:94:52: warning: unused parameter ‘bs’ [-Wunused-parameter]
94 | void ggml_vec_dot_i2_i8_s(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
| ~~~~~~~^~
/root/BitNet/src/ggml-bitnet-mad.cpp:94:80: warning: unused parameter ‘bx’ [-Wunused-parameter]
94 | void ggml_vec_dot_i2_i8_s(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
| ~~~~~~~^~
/root/BitNet/src/ggml-bitnet-mad.cpp:94:108: warning: unused parameter ‘by’ [-Wunused-parameter]
94 | void ggml_vec_dot_i2_i8_s(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
| ~~~~~~~^~
/root/BitNet/src/ggml-bitnet-mad.cpp:94:116: warning: unused parameter ‘nrc’ [-Wunused-parameter]
94 | void ggml_vec_dot_i2_i8_s(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
| ~~~~^~~
during GIMPLE pass: vect
/root/BitNet/src/ggml-bitnet-mad.cpp: In function ‘size_t quantize_i2_s(const float
, void
, int64_t, int64_t, const float
)’:
/root/BitNet/src/ggml-bitnet-mad.cpp:46:8: internal compiler error: in vect_transform_reduction, at tree-vect-loop.cc:7457
46 | size_t quantize_i2_s(const float * src, void * dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
| ^~~~~~~~~~~~~
0x18e9327 internal_error(char const
, ...)
???:0
0x6a6c8f fancy_abort(char const
, int, char const
)
???:0
0xf6c43b vect_transform_reduction(_loop_vec_info
, _stmt_vec_info
, gimple_stmt_iterator
, gimple
*, _slp_tree*)
???:0
0x18aedcf vect_transform_stmt(vec_info*, _stmt_vec_info*, gimple_stmt_iterator*, _slp_tree*, _slp_instance*)
???:0
0xf79bcf vect_transform_loop(_loop_vec_info*, gimple*)
???:0
Please submit a full bug report, with preprocessed source (by using -freport-bug).
Please include the complete backtrace with any bug report.
See file:///usr/share/doc/gcc-12/README.Bugs for instructions.
gmake[2]: *** [3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/build.make:132: 3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/////src/ggml-bitnet-mad.cpp.o] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:759: 3rdparty/llama.cpp/ggml/src/CMakeFiles/ggml.dir/all] Error 2
gmake: *** [Makefile:136: all] Error 2

Can anyone help me?

It seems like you're using gcc instead of clang, try specifying clang. I solved it using this command:

cmake -DBITNET_ARM_TL1=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -B build