From 1c1afe09a5f738ad12ffc73ea983e93d69c070d7 Mon Sep 17 00:00:00 2001 From: l1cacheDell Date: Mon, 12 May 2025 05:49:14 +0000 Subject: [PATCH 1/2] boost compiling --- csrc/setup_cuda.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/csrc/setup_cuda.py b/csrc/setup_cuda.py index d5b197c8ce38..cfd5b2489448 100644 --- a/csrc/setup_cuda.py +++ b/csrc/setup_cuda.py @@ -138,6 +138,7 @@ def get_gencode_flags(): nvcc_compile_args = gencode_flags update_git_submodule() +os.environ.pop('PADDLE_CUDA_ARCH_LIST', None) nvcc_compile_args += [ "-O3", "-DNDEBUG", @@ -192,11 +193,8 @@ def get_gencode_flags(): ] if cc >= 80 and nvcc_version >= Version("12.4"): - os.environ.pop('PADDLE_CUDA_ARCH_LIST', None) nvcc_compile_args += [ - "-std=c++17", "--use_fast_math", - "--threads=8", "-D_GLIBCXX_USE_CXX11_ABI=1", ] sources += ["./gpu/sage_attn_kernels/sageattn_fused.cu"] @@ -235,7 +233,7 @@ def get_gencode_flags(): ext_modules=CUDAExtension( sources=sources, extra_compile_args={ - "cxx": ["-O3", "-fopenmp", "-lgomp", "-std=c++17", "-DENABLE_BF16"], + "cxx": ["-O3", "-fopenmp", "-lgomp", "-std=c++17", "-DENABLE_BF16", "--threads=8"], "nvcc": nvcc_compile_args, }, libraries=["cublasLt"], From bbe6fe6333aaa1e3cee8ed971e00b9798a652801 Mon Sep 17 00:00:00 2001 From: l1cacheDell Date: Tue, 13 May 2025 07:58:44 +0000 Subject: [PATCH 2/2] move pop func into sageattn --- csrc/setup_cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/setup_cuda.py b/csrc/setup_cuda.py index cfd5b2489448..13a27b15e0f2 100644 --- a/csrc/setup_cuda.py +++ b/csrc/setup_cuda.py @@ -138,7 +138,6 @@ def get_gencode_flags(): nvcc_compile_args = gencode_flags update_git_submodule() -os.environ.pop('PADDLE_CUDA_ARCH_LIST', None) nvcc_compile_args += [ "-O3", "-DNDEBUG", @@ -193,6 +192,7 @@ def get_gencode_flags(): ] if cc >= 80 and nvcc_version >= Version("12.4"): + os.environ.pop('PADDLE_CUDA_ARCH_LIST', None) nvcc_compile_args += [ "--use_fast_math", "-D_GLIBCXX_USE_CXX11_ABI=1",