| from transformers import AutoModelForSequenceClassification | |
| import torch | |
| model_name = "OpenBMB/MiniCPM-Reranker-Light" | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).to("cuda") | |
| # You can use flash-attention 2 to speed up the inference | |
| # model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda") | |
| model.eval() | |
| query = "中国的首都是哪里?" # "Where is the capital of China?" | |
| passages = ["beijing", "shanghai"] # 北京,上海 | |
| rerank_score = model.rerank(query, passages,query_instruction="Query:", batch_size=32, max_length=1024) | |
| print(rerank_score) #[0.01791382 0.00024533] | |
| sentence_pairs = [[f"Query: {query}", doc] for doc in passages] | |
| scores = model.compute_score(sentence_pairs, batch_size=32, max_length=1024) | |
| print(scores) #[0.01791382 0.00024533] |