dni138 commited on
Commit
edc9868
·
verified ·
1 Parent(s): 49ea8f4

Update README.md

Browse files

Provides documentation on what we are attempting to do by adding this model to Mozilla AI + provides starter code for people to use the model.

Files changed (1) hide show
  1. README.md +114 -3
README.md CHANGED
@@ -7,6 +7,117 @@ tags:
7
  ---
8
 
9
  This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
10
- - Code: [More Information Needed]
11
- - Paper: [More Information Needed]
12
- - Docs: [More Information Needed]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
10
+
11
+ For full documentation of this model, please see the official [model card](https://huggingface.co/govtech/jina-embeddings-v2-small-en-off-topic). They are the ones who built the model.
12
+
13
+ Mozilla AI has made it so you can call the `govtech/jina-embeddings-v2-small-en-off-topic` using `from_pretrained`. To do this, you'll need to first pull the `CrossEncoderWithSharedBase` model
14
+ architectuer from their model card and make sure to add `PyTorchModelHubMixin` as an inherited class. See this [article](https://huggingface.co/docs/hub/en/models-uploading#upload-a-pytorch-model-using-huggingfacehub)
15
+
16
+ Then, you can do the following:
17
+
18
+ ```python
19
+ from transformers import AutoModel, AutoTokenizer
20
+ from huggingface_hub import PyTorchModelHubMixin
21
+ import torch.nn as nn
22
+
23
+ class Adapter(nn.Module):
24
+ def __init__(self, hidden_size):
25
+ super(Adapter, self).__init__()
26
+ self.down_project = nn.Linear(hidden_size, hidden_size // 2)
27
+ self.activation = nn.ReLU()
28
+ self.up_project = nn.Linear(hidden_size // 2, hidden_size)
29
+
30
+ def forward(self, x):
31
+ down = self.down_project(x)
32
+ activated = self.activation(down)
33
+ up = self.up_project(activated)
34
+ return up + x # Residual connection
35
+
36
+ class AttentionPooling(nn.Module):
37
+ def __init__(self, hidden_size):
38
+ super(AttentionPooling, self).__init__()
39
+ self.attention_weights = nn.Parameter(torch.randn(hidden_size))
40
+
41
+ def forward(self, hidden_states):
42
+ # hidden_states: [seq_len, batch_size, hidden_size]
43
+ scores = torch.matmul(hidden_states, self.attention_weights)
44
+ attention_weights = torch.softmax(scores, dim=0)
45
+ weighted_sum = torch.sum(attention_weights.unsqueeze(-1) * hidden_states, dim=0)
46
+ return weighted_sum
47
+
48
+ class CrossEncoderWithSharedBase(nn.Module, PyTorchModelHubMixin):
49
+ def __init__(self, base_model, num_labels=2, num_heads=8):
50
+ super(CrossEncoderWithSharedBase, self).__init__()
51
+ # Shared pre-trained model
52
+ self.shared_encoder = base_model
53
+ hidden_size = self.shared_encoder.config.hidden_size
54
+ # Sentence-specific adapters
55
+ self.adapter1 = Adapter(hidden_size)
56
+ self.adapter2 = Adapter(hidden_size)
57
+ # Cross-attention layers
58
+ self.cross_attention_1_to_2 = nn.MultiheadAttention(hidden_size, num_heads)
59
+ self.cross_attention_2_to_1 = nn.MultiheadAttention(hidden_size, num_heads)
60
+ # Attention pooling layers
61
+ self.attn_pooling_1_to_2 = AttentionPooling(hidden_size)
62
+ self.attn_pooling_2_to_1 = AttentionPooling(hidden_size)
63
+ # Projection layer with non-linearity
64
+ self.projection_layer = nn.Sequential(
65
+ nn.Linear(hidden_size * 2, hidden_size),
66
+ nn.ReLU()
67
+ )
68
+ # Classifier with three hidden layers
69
+ self.classifier = nn.Sequential(
70
+ nn.Linear(hidden_size, hidden_size // 2),
71
+ nn.ReLU(),
72
+ nn.Dropout(0.1),
73
+ nn.Linear(hidden_size // 2, hidden_size // 4),
74
+ nn.ReLU(),
75
+ nn.Dropout(0.1),
76
+ nn.Linear(hidden_size // 4, num_labels)
77
+ )
78
+ def forward(self, input_ids1, attention_mask1, input_ids2, attention_mask2):
79
+ # Encode sentences
80
+ outputs1 = self.shared_encoder(input_ids1, attention_mask=attention_mask1)
81
+ outputs2 = self.shared_encoder(input_ids2, attention_mask=attention_mask2)
82
+ # Apply sentence-specific adapters
83
+ embeds1 = self.adapter1(outputs1.last_hidden_state)
84
+ embeds2 = self.adapter2(outputs2.last_hidden_state)
85
+ # Transpose for attention layers
86
+ embeds1 = embeds1.transpose(0, 1)
87
+ embeds2 = embeds2.transpose(0, 1)
88
+ # Cross-attention
89
+ cross_attn_1_to_2, _ = self.cross_attention_1_to_2(embeds1, embeds2, embeds2)
90
+ cross_attn_2_to_1, _ = self.cross_attention_2_to_1(embeds2, embeds1, embeds1)
91
+ # Attention pooling
92
+ pooled_1_to_2 = self.attn_pooling_1_to_2(cross_attn_1_to_2)
93
+ pooled_2_to_1 = self.attn_pooling_2_to_1(cross_attn_2_to_1)
94
+ # Concatenate and project
95
+ combined = torch.cat((pooled_1_to_2, pooled_2_to_1), dim=1)
96
+ projected = self.projection_layer(combined)
97
+ # Classification
98
+ logits = self.classifier(projected)
99
+ return logits
100
+
101
+ tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-small-en")
102
+ base_model = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-small-en")
103
+ off_topic = CrossEncoderWithSharedBase.from_pretrained("mozilla-ai/jina-embeddings-v2-small-en", base_model=base_model)
104
+
105
+ # Then you can build a predict function that utilizes the tokenizer
106
+
107
+ def predict(model, tokenizer, sentence1, sentence2):
108
+ inputs1 = tokenizer(sentence1, return_tensors="pt", truncation=True, padding="max_length", max_length=max_length)
109
+ inputs2 = tokenizer(sentence2, return_tensors="pt", truncation=True, padding="max_length", max_length=max_length)
110
+ input_ids1 = inputs1['input_ids'].to(device)
111
+ attention_mask1 = inputs1['attention_mask'].to(device)
112
+ input_ids2 = inputs2['input_ids'].to(device)
113
+ attention_mask2 = inputs2['attention_mask'].to(device)
114
+
115
+ # Get outputs
116
+ with torch.no_grad():
117
+ outputs = model(input_ids1=input_ids1, attention_mask1=attention_mask1,
118
+ input_ids2=input_ids2, attention_mask2=attention_mask2)
119
+ probabilities = torch.softmax(outputs, dim=1)
120
+ predicted_label = torch.argmax(probabilities, dim=1).item()
121
+
122
+ return predicted_label, probabilities.cpu().numpy()
123
+ ```