Spaces:

1inkusFace
/

SkyReelsV2

Running on Zero

1inkusFace commited on 6 days ago

Commit

67ad25f

verified ·

1 Parent(s): 3150bd7

Update skyreels_v2_infer/modules/attention.py

Files changed (1) hide show

skyreels_v2_infer/modules/attention.py CHANGED Viewed

@@ -104,7 +104,7 @@ def flash_attention(
             deterministic=deterministic,
         )[0].unflatten(0, (b, lq))
     else:
-        #assert FLASH_ATTN_2_AVAILABLE
         x = flash_attn.flash_attn_varlen_func(
             q=q,
             k=k,
@@ -144,6 +144,23 @@ def attention(
     dtype=torch.bfloat16,
     fa_version=None,
 ):
         if q_lens is not None or k_lens is not None:
             warnings.warn(
                 "Padding mask is disabled when using scaled_dot_product_attention. It can have a significant impact on performance."
@@ -159,4 +176,4 @@ def attention(
         )
         out = out.transpose(1, 2).contiguous()
-        return out

             deterministic=deterministic,
         )[0].unflatten(0, (b, lq))
     else:
+        assert FLASH_ATTN_2_AVAILABLE
         x = flash_attn.flash_attn_varlen_func(
             q=q,
             k=k,
     dtype=torch.bfloat16,
     fa_version=None,
 ):
+    if FLASH_ATTN_2_AVAILABLE or FLASH_ATTN_3_AVAILABLE:
+        return flash_attention(
+            q=q,
+            k=k,
+            v=v,
+            q_lens=q_lens,
+            k_lens=k_lens,
+            dropout_p=dropout_p,
+            softmax_scale=softmax_scale,
+            q_scale=q_scale,
+            causal=causal,
+            window_size=window_size,
+            deterministic=deterministic,
+            dtype=dtype,
+            version=fa_version,
+        )
+    else:
         if q_lens is not None or k_lens is not None:
             warnings.warn(
                 "Padding mask is disabled when using scaled_dot_product_attention. It can have a significant impact on performance."
         )
         out = out.transpose(1, 2).contiguous()
+        return out