peralp24 commited on
Commit
7c6354e
·
verified ·
1 Parent(s): b04675c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -0
README.md CHANGED
@@ -100,6 +100,40 @@ To perform inference with the original model files, you’ll first need to insta
100
  After installation, download the model weights and use the Scaling inference module to load the
101
  checkpoint, vocabulary, and configuration files.
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  ## Bias, Risks, and Limitations
104
 
105
  <!-- This section is meant to convey both technical and sociotechnical limitations. -->
 
100
  After installation, download the model weights and use the Scaling inference module to load the
101
  checkpoint, vocabulary, and configuration files.
102
 
103
+ ```
104
+ from pathlib import Path
105
+ from torch.nn import CosineSimilarity
106
+ from scaling.transformer.inference import TransformerInferenceModule
107
+ MODEL_PATH = "/path/to/model"
108
+ inference_model = TransformerInferenceModule.from_checkpoint(
109
+ checkpoint_dir=Path(MODEL_PATH),
110
+ )
111
+ # embed the query:
112
+ query = "Which country is Galileo from?"
113
+ query_embeddings = inference_model.encode_queries(query, convert_to_tensor=True)
114
+ print(f"Type of embeddings: {type(query_embeddings)},\n\
115
+ shape of query embeddings: {query_embeddings.shape}")
116
+ # embed the documents:
117
+ document_1 = "Galileo is a German television program series produced and broadcast on ProSieben television network. It is also sold to broadcasters in other countries (namely Russia and Poland). The first show was broadcast in 1998, and is now stored in the Arctic World Archive in Svalbard, Norway, after being transferred to special film created by Piql."
118
+ document_embeddings_1 = inference_model.encode_corpus(document_1, convert_to_tensor=True)
119
+ document_2 = "Galileo di Vincenzo Bonaiuti de' Galilei (15 February 1564 - 8 January 1642), commonly referred to as Galileo Galilei or mononymously as Galileo, was an Italian (Florentine) astronomer, physicist and engineer, sometimes described as a polymath. He was born in the city of Pisa, then part of the Duchy of Florence and present-day Italy."
120
+ document_embeddings_2 = inference_model.encode_corpus(document_2, convert_to_tensor=True)
121
+ # customized embeddings steering the query:
122
+ instruction = "Represent the question about TV shows to find a paragraph that answers it."
123
+ steered_query_embeddings = inference_model.encode_queries(query,
124
+ instruction=instruction,
125
+ convert_to_tensor=True)
126
+ # compute similarity between steered query and both documents
127
+ cossim = CosineSimilarity(dim=1, eps=1e-6)
128
+ sim1 = round(cossim(document_embeddings_1, steered_query_embeddings).item(), 3)
129
+ sim2 = round(cossim(document_embeddings_2, steered_query_embeddings).item(), 3)
130
+ print("Steered embedding causes higher similarity of query to TV show:")
131
+ print(f"Similarity query/TV show ({sim1}) > similarity query/Italian polymath: ({sim2})")
132
+ ```
133
+
134
+
135
+
136
+
137
  ## Bias, Risks, and Limitations
138
 
139
  <!-- This section is meant to convey both technical and sociotechnical limitations. -->