| [general] | |
| name = "flash_mla" | |
| [torch] | |
| src = ["torch-ext/torch_binding.cpp", "torch-ext/torch_binding.h"] | |
| [kernel.activation] | |
| cuda-capabilities = [ | |
| # "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", | |
| # Only available on H100 and H200 | |
| "9.0", # (Hopper) | |
| ] | |
| src = [ | |
| "flash_mla/flash_mla_api.cu", | |
| "flash_mla/flash_fwd_mla_bf16_sm90.cu", | |
| "flash_mla/flash_fwd_mla_fp16_sm90.cu", | |
| "flash_mla/flash_fwd_mla_kernel.h", | |
| "flash_mla/flash_fwd_mla_metadata.cu", | |
| "flash_mla/flash_mla.h", | |
| "flash_mla/named_barrier.h", | |
| "flash_mla/softmax.h", | |
| "flash_mla/static_switch.h", | |
| "flash_mla/utils.h", | |
| ] | |
| depends = ["torch", "cutlass_3_6"] | |