Update README.md
Browse files
README.md
CHANGED
@@ -100,6 +100,40 @@ To perform inference with the original model files, you’ll first need to insta
|
|
100 |
After installation, download the model weights and use the Scaling inference module to load the
|
101 |
checkpoint, vocabulary, and configuration files.
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
## Bias, Risks, and Limitations
|
104 |
|
105 |
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
|
|
100 |
After installation, download the model weights and use the Scaling inference module to load the
|
101 |
checkpoint, vocabulary, and configuration files.
|
102 |
|
103 |
+
```
|
104 |
+
from pathlib import Path
|
105 |
+
from torch.nn import CosineSimilarity
|
106 |
+
from scaling.transformer.inference import TransformerInferenceModule
|
107 |
+
MODEL_PATH = "/path/to/model"
|
108 |
+
inference_model = TransformerInferenceModule.from_checkpoint(
|
109 |
+
checkpoint_dir=Path(MODEL_PATH),
|
110 |
+
)
|
111 |
+
# embed the query:
|
112 |
+
query = "Which country is Galileo from?"
|
113 |
+
query_embeddings = inference_model.encode_queries(query, convert_to_tensor=True)
|
114 |
+
print(f"Type of embeddings: {type(query_embeddings)},\n\
|
115 |
+
shape of query embeddings: {query_embeddings.shape}")
|
116 |
+
# embed the documents:
|
117 |
+
document_1 = "Galileo is a German television program series produced and broadcast on ProSieben television network. It is also sold to broadcasters in other countries (namely Russia and Poland). The first show was broadcast in 1998, and is now stored in the Arctic World Archive in Svalbard, Norway, after being transferred to special film created by Piql."
|
118 |
+
document_embeddings_1 = inference_model.encode_corpus(document_1, convert_to_tensor=True)
|
119 |
+
document_2 = "Galileo di Vincenzo Bonaiuti de' Galilei (15 February 1564 - 8 January 1642), commonly referred to as Galileo Galilei or mononymously as Galileo, was an Italian (Florentine) astronomer, physicist and engineer, sometimes described as a polymath. He was born in the city of Pisa, then part of the Duchy of Florence and present-day Italy."
|
120 |
+
document_embeddings_2 = inference_model.encode_corpus(document_2, convert_to_tensor=True)
|
121 |
+
# customized embeddings steering the query:
|
122 |
+
instruction = "Represent the question about TV shows to find a paragraph that answers it."
|
123 |
+
steered_query_embeddings = inference_model.encode_queries(query,
|
124 |
+
instruction=instruction,
|
125 |
+
convert_to_tensor=True)
|
126 |
+
# compute similarity between steered query and both documents
|
127 |
+
cossim = CosineSimilarity(dim=1, eps=1e-6)
|
128 |
+
sim1 = round(cossim(document_embeddings_1, steered_query_embeddings).item(), 3)
|
129 |
+
sim2 = round(cossim(document_embeddings_2, steered_query_embeddings).item(), 3)
|
130 |
+
print("Steered embedding causes higher similarity of query to TV show:")
|
131 |
+
print(f"Similarity query/TV show ({sim1}) > similarity query/Italian polymath: ({sim2})")
|
132 |
+
```
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
## Bias, Risks, and Limitations
|
138 |
|
139 |
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|