lievan commited on
Commit
c857edd
·
verified ·
1 Parent(s): 68cc04d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +42 -2
README.md CHANGED
@@ -28,7 +28,47 @@ Eurus-RM-7B is trained on a mixture of [UltraInteract](https://huggingface.co/da
28
 
29
  ## Usage
30
  ```python
31
- from transformers import PreTrainedModel, AutoModel, AutoTokenizer, AutoConfig, AutoModelForCausalLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def test(model_path):
34
  dataset = [ # cases in webgpt; we use the same template as Mistral-Instruct-v0.2
@@ -38,7 +78,7 @@ def test(model_path):
38
 
39
 
40
  tokenizer = AutoTokenizer.from_pretrained(model_path)
41
- model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
42
 
43
  for example in dataset:
44
  inputs = tokenizer(example["chosen"], return_tensors="pt")
 
28
 
29
  ## Usage
30
  ```python
31
+ from transformers import PreTrainedModel, MistralConfig, MistralModel
32
+ import torch.nn as nn
33
+ import torch
34
+ from typing import Optional, List
35
+
36
+ class EurusRewardModel(PreTrainedModel):
37
+ config_class = MistralConfig
38
+ def __init__(self, config):
39
+ super().__init__(config)
40
+ self.model = MistralModel(config)
41
+ self.regression_head = nn.Linear(self.config.hidden_size, 1, bias=False)
42
+
43
+ def forward( # args are the same as LlamaForCausalLM
44
+ self,
45
+ input_ids: torch.LongTensor = None,
46
+ attention_mask: Optional[torch.Tensor] = None,
47
+ position_ids: Optional[torch.LongTensor] = None,
48
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
49
+ inputs_embeds: Optional[torch.FloatTensor] = None,
50
+ labels: Optional[torch.LongTensor] = None,
51
+ use_cache: Optional[bool] = None,
52
+ output_attentions: Optional[bool] = None,
53
+ output_hidden_states: Optional[bool] = None,
54
+ return_dict: Optional[bool] = None,
55
+ ):
56
+
57
+ transformer_outputs = self.model(
58
+ input_ids,
59
+ attention_mask=attention_mask,
60
+ position_ids=position_ids,
61
+ past_key_values=past_key_values,
62
+ inputs_embeds=inputs_embeds,
63
+ )
64
+
65
+ hidden_states = transformer_outputs[0]
66
+ rewards = self.regression_head(hidden_states).squeeze(-1)
67
+
68
+ ends = attention_mask.cumsum(dim=1).argmax(dim=1).view(-1,1)
69
+ rewards = torch.gather(rewards, 1, ends)
70
+
71
+ return rewards
72
 
73
  def test(model_path):
74
  dataset = [ # cases in webgpt; we use the same template as Mistral-Instruct-v0.2
 
78
 
79
 
80
  tokenizer = AutoTokenizer.from_pretrained(model_path)
81
+ model = EurusRewardModel.from_pretrained(model_path)
82
 
83
  for example in dataset:
84
  inputs = tokenizer(example["chosen"], return_tensors="pt")