Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
c5183c8
1
Parent(s):
491e00d
add a ton of models and update arena dataset
Browse files- utils/arena_df.csv +0 -0
- utils/models.py +13 -2
utils/arena_df.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
utils/models.py
CHANGED
@@ -18,7 +18,14 @@ models = {
|
|
18 |
"Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
|
19 |
#"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
|
20 |
"IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
|
21 |
-
"Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
}
|
24 |
|
@@ -94,6 +101,10 @@ def run_inference(model_name, context, question):
|
|
94 |
|
95 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
96 |
result = ""
|
|
|
|
|
|
|
|
|
97 |
|
98 |
try:
|
99 |
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
|
@@ -114,10 +125,10 @@ def run_inference(model_name, context, question):
|
|
114 |
model=model_name,
|
115 |
tokenizer=tokenizer,
|
116 |
device_map='auto',
|
117 |
-
max_length=512,
|
118 |
do_sample=True,
|
119 |
temperature=0.6,
|
120 |
top_p=0.9,
|
|
|
121 |
)
|
122 |
|
123 |
text_input = format_rag_prompt(question, context, accepts_sys)
|
|
|
18 |
"Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
|
19 |
#"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
|
20 |
"IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
|
21 |
+
#"Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T",
|
22 |
+
"MiniCPM3-RAG-LoRA": "openbmb/MiniCPM3-RAG-LoRA",
|
23 |
+
"Qwen3-0.6b": "qwen/qwen3-0.6b",
|
24 |
+
"Qwen3-1.7b": "qwen/qwen3-1.7b",
|
25 |
+
"Qwen3-4b": "qwen/qwen3-4b",
|
26 |
+
"SmolLM2-1.7b-Instruct": "huggingfacetb/smolllm2-1.7b-instruct",
|
27 |
+
"EXAONE-3.5-2.4B-instruct": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct",
|
28 |
+
"OLMo-2-1B-Instruct": "allenai/OLMo-2-0425-1B-Instruct",
|
29 |
|
30 |
}
|
31 |
|
|
|
101 |
|
102 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
103 |
result = ""
|
104 |
+
model_kwargs = {} # make sure qwen3 doesn't use thinking
|
105 |
+
if "qwen3" in model_name.lower(): # Making it case-insensitive and checking for substring
|
106 |
+
print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
|
107 |
+
model_kwargs["enable_thinking"] = False
|
108 |
|
109 |
try:
|
110 |
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
|
|
|
125 |
model=model_name,
|
126 |
tokenizer=tokenizer,
|
127 |
device_map='auto',
|
|
|
128 |
do_sample=True,
|
129 |
temperature=0.6,
|
130 |
top_p=0.9,
|
131 |
+
model_kwargs=model_kwargs,
|
132 |
)
|
133 |
|
134 |
text_input = format_rag_prompt(question, context, accepts_sys)
|