layer_norm / build.toml
medmekk's picture
medmekk HF Staff
Add Builds
5d4178a
[general]
name = "layer_norm"
universal = false
[torch]
src = [
"torch-ext/torch_binding.cpp",
"torch-ext/torch_binding.h",
]
[kernel.layer_norm]
depends = ["torch"]
backend = "cuda"
cuda-capabilities = [
"8.0",
"8.9",
"9.0",
"10.0",
"12.0",
]
include = ["."]
src = [
"layer_norm/ln.h",
"layer_norm/ln_api.cpp",
"layer_norm/ln_bwd_1024.cu",
"layer_norm/ln_bwd_1280.cu",
"layer_norm/ln_bwd_1536.cu",
"layer_norm/ln_bwd_2048.cu",
"layer_norm/ln_bwd_256.cu",
"layer_norm/ln_bwd_2560.cu",
"layer_norm/ln_bwd_3072.cu",
"layer_norm/ln_bwd_4096.cu",
"layer_norm/ln_bwd_512.cu",
"layer_norm/ln_bwd_5120.cu",
"layer_norm/ln_bwd_6144.cu",
"layer_norm/ln_bwd_7168.cu",
"layer_norm/ln_bwd_768.cu",
"layer_norm/ln_bwd_8192.cu",
"layer_norm/ln_bwd_kernels.cuh",
"layer_norm/ln_fwd_1024.cu",
"layer_norm/ln_fwd_1280.cu",
"layer_norm/ln_fwd_1536.cu",
"layer_norm/ln_fwd_2048.cu",
"layer_norm/ln_fwd_256.cu",
"layer_norm/ln_fwd_2560.cu",
"layer_norm/ln_fwd_3072.cu",
"layer_norm/ln_fwd_4096.cu",
"layer_norm/ln_fwd_512.cu",
"layer_norm/ln_fwd_5120.cu",
"layer_norm/ln_fwd_6144.cu",
"layer_norm/ln_fwd_7168.cu",
"layer_norm/ln_fwd_768.cu",
"layer_norm/ln_fwd_8192.cu",
"layer_norm/ln_fwd_kernels.cuh",
"layer_norm/ln_kernel_traits.h",
"layer_norm/ln_parallel_bwd_1024.cu",
"layer_norm/ln_parallel_bwd_1280.cu",
"layer_norm/ln_parallel_bwd_1536.cu",
"layer_norm/ln_parallel_bwd_2048.cu",
"layer_norm/ln_parallel_bwd_256.cu",
"layer_norm/ln_parallel_bwd_2560.cu",
"layer_norm/ln_parallel_bwd_3072.cu",
"layer_norm/ln_parallel_bwd_4096.cu",
"layer_norm/ln_parallel_bwd_512.cu",
"layer_norm/ln_parallel_bwd_5120.cu",
"layer_norm/ln_parallel_bwd_6144.cu",
"layer_norm/ln_parallel_bwd_7168.cu",
"layer_norm/ln_parallel_bwd_768.cu",
"layer_norm/ln_parallel_bwd_8192.cu",
"layer_norm/ln_parallel_fwd_1024.cu",
"layer_norm/ln_parallel_fwd_1280.cu",
"layer_norm/ln_parallel_fwd_1536.cu",
"layer_norm/ln_parallel_fwd_2048.cu",
"layer_norm/ln_parallel_fwd_256.cu",
"layer_norm/ln_parallel_fwd_2560.cu",
"layer_norm/ln_parallel_fwd_3072.cu",
"layer_norm/ln_parallel_fwd_4096.cu",
"layer_norm/ln_parallel_fwd_512.cu",
"layer_norm/ln_parallel_fwd_5120.cu",
"layer_norm/ln_parallel_fwd_6144.cu",
"layer_norm/ln_parallel_fwd_7168.cu",
"layer_norm/ln_parallel_fwd_768.cu",
"layer_norm/ln_parallel_fwd_8192.cu",
"layer_norm/ln_parallel_residual_bwd_kernels.cuh",
"layer_norm/ln_parallel_residual_fwd_kernels.cuh",
"layer_norm/ln_utils.cuh",
"layer_norm/static_switch.h"
]
cxx-flags = ["-DFLASHATTENTION_DISABLE_PYBIND", "-mcmodel=large"]
cuda-flags = [
"-O3",
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
"-U__CUDA_NO_BFLOAT16_OPERATORS__",
"-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
"-U__CUDA_NO_BFLOAT162_OPERATORS__",
"-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
"--expt-relaxed-constexpr",
"--expt-extended-lambda",
"--use_fast_math",
]