KangarooGroup
/

kangaroo

Video-Text-to-Text

text-generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Jiqing commited on 1 day ago

Commit

9c07885

·

verified ·

1 Parent(s): ded0c00

Update modeling_kangaroo.py

Files changed (1) hide show

modeling_kangaroo.py +3 -3

modeling_kangaroo.py CHANGED Viewed

@@ -1020,7 +1020,7 @@ class LlamaModel(LlamaPreTrainedModel):
         min_dtype = torch.finfo(dtype).min
         sequence_length = input_tensor.shape[1]
         if using_static_cache:
-            target_length = past_key_values.get_max_length()
         else:
             target_length = (
                 attention_mask.shape[-1]
@@ -1308,8 +1308,8 @@ class KangarooForCausalLM(LlamaPreTrainedModel):
             if isinstance(past_key_values, Cache):
                 past_length = cache_position[0] if cache_position is not None else past_key_values.get_seq_length()
                 max_cache_length = (
-                    torch.tensor(past_key_values.get_max_length(), device=input_ids.device)
-                    if past_key_values.get_max_length() is not None
                     else None
                 )
                 cache_length = past_length if max_cache_length is None else torch.min(max_cache_length, past_length)

         min_dtype = torch.finfo(dtype).min
         sequence_length = input_tensor.shape[1]
         if using_static_cache:
+            target_length = past_key_values.get_seq_length()
         else:
             target_length = (
                 attention_mask.shape[-1]
             if isinstance(past_key_values, Cache):
                 past_length = cache_position[0] if cache_position is not None else past_key_values.get_seq_length()
                 max_cache_length = (
+                    torch.tensor(past_key_values.get_seq_length(), device=input_ids.device)
+                    if past_key_values.get_seq_length() is not None
                     else None
                 )
                 cache_length = past_length if max_cache_length is None else torch.min(max_cache_length, past_length)