diff --git a/makefiles/common.mk b/makefiles/common.mk index 37e81be..2e44826 100644 --- a/makefiles/common.mk +++ b/makefiles/common.mk @@ -42,9 +42,10 @@ else endif #$(info NVCC_GENCODE is ${NVCC_GENCODE}) -CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) -fPIC -fvisibility=hidden -CXXFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -std=c++11 -Wvla -CXXFLAGS += -I $(CUDA_INC) +CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) -fPIC -fvisibility=hidden \ + -Wall -Wno-unused-function -Wno-sign-compare -std=c++11 -Wvla \ + -I $(CUDA_INC) \ + $(CXXFLAGS) # Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it will cause kernel launch errors) # 512 : 120, 640 : 96, 768 : 80, 1024 : 60 # We would not have to set this if we used __launch_bounds__, but this only works on kernels, not on functions.