Closed
Description
I'm not sure which PR caused this issue, but after syncing up to the latest version, I can't compile with make LLAMA_CUBLAS=1
Here is the log:
I UNAME_S: Linux
I UNAME_P: x86_64
I UNAME_M: x86_64
I CFLAGS: -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion
I CXXFLAGS: -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi
I NVCCFLAGS:
I LDFLAGS:
I CC: cc (Ubuntu 9.4.0-1ubuntu1~18.04) 9.4.0
I CXX: g++ (Ubuntu 9.4.0-1ubuntu1~18.04) 9.4.0
rm -vrf *.o tests/*.o *.so *.dll benchmark-matmult common/build-info.cpp *.dot *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup tests/test-c.o metal tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope tests/test-backend-ops
removed 'build-info.o'
removed 'common.o'
removed 'console.o'
removed 'ggml-alloc.o'
removed 'ggml-backend.o'
removed 'ggml-cuda.o'
removed 'ggml-quants.o'
removed 'ggml.o'
removed 'grammar-parser.o'
removed 'llama.o'
removed 'sampling.o'
removed 'common/build-info.cpp'
I llama.cpp build info:
I UNAME_S: Linux
I UNAME_P: x86_64
I UNAME_M: x86_64
I CFLAGS: -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion
I CXXFLAGS: -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi
I NVCCFLAGS: -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
I LDFLAGS: -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/home/ubuntu/anaconda3/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
I CC: cc (Ubuntu 9.4.0-1ubuntu1~18.04) 9.4.0
I CXX: g++ (Ubuntu 9.4.0-1ubuntu1~18.04) 9.4.0
cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion -c ggml.c -o ggml.o
g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c llama.cpp -o llama.o
g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/common.cpp -o common.o
g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/sampling.cpp -o sampling.o
g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/grammar-parser.cpp -o grammar-parser.o
g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/build-info.cpp -o build-info.o
g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/console.cpp -o console.o
nvcc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -Wno-pedantic -Xcompiler "-Wno-array-bounds -Wno-format-truncation -Wextra-semi" -c ggml-cuda.cu -o ggml-cuda.o
cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion -c ggml-alloc.c -o ggml-alloc.o
cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion -c ggml-backend.c -o ggml-backend.o
cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion -c ggml-quants.c -o ggml-quants.o
g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/home/ubuntu/anaconda3/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/main/main.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o console.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o main -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/home/ubuntu/anaconda3/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
/usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status
Makefile:588: recipe for target 'main' failed
make: *** [main] Error 1```