Add pipeline tag and link to paper and project page

#2
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +32 -6
README.md CHANGED
@@ -1,15 +1,20 @@
1
  ---
2
- license: apache-2.0
3
  base_model:
4
  - Qwen/Qwen3-4B-Base
5
  library_name: transformers
 
 
6
  ---
 
7
  # Qwen3-Reranker-4B
8
 
9
  <p align="center">
10
  <img src="https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/logo_qwen3.png" width="400"/>
11
  <p>
12
 
 
 
 
13
  ## Highlights
14
 
15
  The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. Building upon the dense foundational models of the Qwen3 series, it provides a comprehensive range of text embeddings and reranking models in various sizes (0.6B, 4B, and 8B). This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.
@@ -63,7 +68,9 @@ from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
63
  def format_instruction(instruction, query, doc):
64
  if instruction is None:
65
  instruction = 'Given a web search query, retrieve relevant passages that answer the query'
66
- output = "<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}".format(instruction=instruction,query=query, doc=doc)
 
 
67
  return output
68
 
69
  def process_inputs(pairs):
@@ -98,8 +105,17 @@ token_false_id = tokenizer.convert_tokens_to_ids("no")
98
  token_true_id = tokenizer.convert_tokens_to_ids("yes")
99
  max_length = 8192
100
 
101
- prefix = "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n"
102
- suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
 
 
 
 
 
 
 
 
 
103
  prefix_tokens = tokenizer.encode(prefix, add_special_tokens=False)
104
  suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
105
 
@@ -147,7 +163,11 @@ from vllm.inputs.data import TokensPrompt
147
  def format_instruction(instruction, query, doc):
148
  text = [
149
  {"role": "system", "content": "Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\"."},
150
- {"role": "user", "content": f"<Instruct>: {instruction}\n\n<Query>: {query}\n\n<Document>: {doc}"}
 
 
 
 
151
  ]
152
  return text
153
 
@@ -185,7 +205,13 @@ tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Reranker-4B')
185
  model = LLM(model='Qwen/Qwen3-Reranker-4B', tensor_parallel_size=number_of_gpu, max_model_len=10000, enable_prefix_caching=True, gpu_memory_utilization=0.8)
186
  tokenizer.padding_side = "left"
187
  tokenizer.pad_token = tokenizer.eos_token
188
- suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
 
 
 
 
 
 
189
  max_length=8192
190
  suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
191
  true_token = tokenizer("yes", add_special_tokens=False).input_ids[0]
 
1
  ---
 
2
  base_model:
3
  - Qwen/Qwen3-4B-Base
4
  library_name: transformers
5
+ license: apache-2.0
6
+ pipeline_tag: text-ranking
7
  ---
8
+
9
  # Qwen3-Reranker-4B
10
 
11
  <p align="center">
12
  <img src="https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/logo_qwen3.png" width="400"/>
13
  <p>
14
 
15
+ This repository contains the model described in the paper [Qwen3 Embedding: Advancing Text Embedding and Reranking Through Foundation Models](https://huggingface.co/papers/2506.05176).
16
+ For more information, please check out the [project page](https://qwenlm.github.io/blog/qwen3-embedding/).
17
+
18
  ## Highlights
19
 
20
  The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. Building upon the dense foundational models of the Qwen3 series, it provides a comprehensive range of text embeddings and reranking models in various sizes (0.6B, 4B, and 8B). This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.
 
68
  def format_instruction(instruction, query, doc):
69
  if instruction is None:
70
  instruction = 'Given a web search query, retrieve relevant passages that answer the query'
71
+ output = "<Instruct>: {instruction}
72
+ <Query>: {query}
73
+ <Document>: {doc}".format(instruction=instruction,query=query, doc=doc)
74
  return output
75
 
76
  def process_inputs(pairs):
 
105
  token_true_id = tokenizer.convert_tokens_to_ids("yes")
106
  max_length = 8192
107
 
108
+ prefix = "<|im_start|>system
109
+ Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>
110
+ <|im_start|>user
111
+ "
112
+ suffix = "<|im_end|>
113
+ <|im_start|>assistant
114
+ <think>
115
+
116
+ </think>
117
+
118
+ "
119
  prefix_tokens = tokenizer.encode(prefix, add_special_tokens=False)
120
  suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
121
 
 
163
  def format_instruction(instruction, query, doc):
164
  text = [
165
  {"role": "system", "content": "Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\"."},
166
+ {"role": "user", "content": f"<Instruct>: {instruction}
167
+
168
+ <Query>: {query}
169
+
170
+ <Document>: {doc}"}
171
  ]
172
  return text
173
 
 
205
  model = LLM(model='Qwen/Qwen3-Reranker-4B', tensor_parallel_size=number_of_gpu, max_model_len=10000, enable_prefix_caching=True, gpu_memory_utilization=0.8)
206
  tokenizer.padding_side = "left"
207
  tokenizer.pad_token = tokenizer.eos_token
208
+ suffix = "<|im_end|>
209
+ <|im_start|>assistant
210
+ <think>
211
+
212
+ </think>
213
+
214
+ "
215
  max_length=8192
216
  suffix_tokens = tokenizer.encode(suffix, add_special_tokens=False)
217
  true_token = tokenizer("yes", add_special_tokens=False).input_ids[0]