Dream-org
/

Dream-v0-Instruct-7B

Feature Extraction

Transformers

Safetensors

Dream

custom_code

Model card Files Files and versions Community

jiacheng-ye commited on Apr 4

Commit

baec25c

verified ·

1 Parent(s): 0bacc53

Upload model

Browse files

Files changed (1) hide show

generation_utils.py +21 -6

generation_utils.py CHANGED Viewed

@@ -302,8 +302,9 @@ class DreamGenerationMixin:
         **kwargs,
     ) -> Union[DreamModelOutput, torch.LongTensor]:
         # 1. Handle `generation_config` and kwargs that might update it, and validate the `.generate()` call
-        tokenizer = kwargs.pop("tokenizer", None)  # Pull this out first, we only use it for stopping criteria
         generation_config = self._prepare_generation_config(generation_config, **kwargs)
         # 2. Define model inputs
         assert inputs is not None
@@ -355,6 +356,8 @@ class DreamGenerationMixin:
             input_ids,
             attention_mask=attention_mask,
             generation_config=generation_config,
         )
         return result
@@ -363,6 +366,8 @@ class DreamGenerationMixin:
         input_ids: torch.LongTensor,
         attention_mask: Optional[torch.LongTensor],
         generation_config: DreamGenerationConfig,
     ) -> Union[DreamModelOutput, torch.LongTensor]:
         # init values
         output_history = generation_config.output_history
@@ -398,11 +403,18 @@ class DreamGenerationMixin:
             attention_mask = "full"
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
         for i in range(steps):
             mask_index = (x == mask_token_id)
             logits = self(x, attention_mask, tok_idx).logits
             logits = torch.cat([logits[:,:1], logits[:, :-1]], dim=1)
-            logits = logits[mask_index]
             t = timesteps[i]
             s = timesteps[i + 1]
@@ -410,15 +422,15 @@ class DreamGenerationMixin:
                 p_transfer = 1 - s / t if i < steps - 1 else 1
                 x0 = torch.zeros_like(x[mask_index], device=self.device, dtype=torch.long) + mask_token_id
                 transfer_index_t_s = torch.rand(*x0.shape, device=self.device) < p_transfer
-                _, x0[transfer_index_t_s]= sample_tokens(logits[transfer_index_t_s], temperature=temperature, top_p=top_p, top_k=top_k)
                 x[mask_index] = x0.clone()
             else:
                 if alg == 'maskgit_plus':
-                    confidence, x0 = sample_tokens(logits, temperature=temperature, top_p=top_p, top_k=top_k)
                 elif alg == 'topk_margin':
-                    confidence, x0 = sample_tokens(logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True)
                 elif alg == 'entropy':
-                    confidence, x0 = sample_tokens(logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True)
                 else:
                     raise RuntimeError(f"Unknown alg: {alg}")
                 num_mask_token = mask_index.sum()
@@ -434,6 +446,9 @@ class DreamGenerationMixin:
                     x0_[transfer_index] = x0[transfer_index].clone()
                     x[mask_index] = x0_
             if histories is not None:
                 histories.append(x.clone())

         **kwargs,
     ) -> Union[DreamModelOutput, torch.LongTensor]:
         # 1. Handle `generation_config` and kwargs that might update it, and validate the `.generate()` call
         generation_config = self._prepare_generation_config(generation_config, **kwargs)
+        generation_tokens_hook_func = kwargs.pop("generation_tokens_hook_func", lambda step, x, logits: x)
+        generation_logits_hook_func = kwargs.pop("generation_logits_hook_func", lambda step, x, logits: logits)
         # 2. Define model inputs
         assert inputs is not None
             input_ids,
             attention_mask=attention_mask,
             generation_config=generation_config,
+            generation_tokens_hook_func=generation_tokens_hook_func,
+            generation_logits_hook_func=generation_logits_hook_func
         )
         return result
         input_ids: torch.LongTensor,
         attention_mask: Optional[torch.LongTensor],
         generation_config: DreamGenerationConfig,
+        generation_tokens_hook_func,
+        generation_logits_hook_func
     ) -> Union[DreamModelOutput, torch.LongTensor]:
         # init values
         output_history = generation_config.output_history
             attention_mask = "full"
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
+        # this allows user-defined token control of the intermediate steps
+        x = generation_tokens_hook_func(None, x, None)
         for i in range(steps):
             mask_index = (x == mask_token_id)
             logits = self(x, attention_mask, tok_idx).logits
             logits = torch.cat([logits[:,:1], logits[:, :-1]], dim=1)
+            # this allows user-defined logits control of the intermediate steps
+            logits = generation_logits_hook_func(i, x, logits)
+            mask_logits = logits[mask_index]
             t = timesteps[i]
             s = timesteps[i + 1]
                 p_transfer = 1 - s / t if i < steps - 1 else 1
                 x0 = torch.zeros_like(x[mask_index], device=self.device, dtype=torch.long) + mask_token_id
                 transfer_index_t_s = torch.rand(*x0.shape, device=self.device) < p_transfer
+                _, x0[transfer_index_t_s]= sample_tokens(mask_logits[transfer_index_t_s], temperature=temperature, top_p=top_p, top_k=top_k)
                 x[mask_index] = x0.clone()
             else:
                 if alg == 'maskgit_plus':
+                    confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
                 elif alg == 'topk_margin':
+                    confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True)
                 elif alg == 'entropy':
+                    confidence, x0 = sample_tokens(mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True)
                 else:
                     raise RuntimeError(f"Unknown alg: {alg}")
                 num_mask_token = mask_index.sum()
                     x0_[transfer_index] = x0[transfer_index].clone()
                     x[mask_index] = x0_
+            # this allows user-defined token control of the intermediate steps
+            x = generation_tokens_hook_func(i, x, logits)
             if histories is not None:
                 histories.append(x.clone())