update readme for SDP
Browse files
README.md
CHANGED
|
@@ -300,6 +300,9 @@ weight_decay:
|
|
| 300 |
xformers_attention:
|
| 301 |
# whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
|
| 302 |
flash_attention: # require a100 for llama
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
# resume from a specific checkpoint dir
|
| 305 |
resume_from_checkpoint:
|
|
|
|
| 300 |
xformers_attention:
|
| 301 |
# whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
|
| 302 |
flash_attention: # require a100 for llama
|
| 303 |
+
# whether to use scaled-dot-product attention
|
| 304 |
+
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
|
| 305 |
+
sdp_attention:
|
| 306 |
|
| 307 |
# resume from a specific checkpoint dir
|
| 308 |
resume_from_checkpoint:
|