prithivMLmods commited on
Commit
78fe82f
·
verified ·
1 Parent(s): cd9566f

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|box_end|>": 151649,
3
+ "<|box_start|>": 151648,
4
+ "<|endoftext|>": 151643,
5
+ "<|im_end|>": 151645,
6
+ "<|im_start|>": 151644,
7
+ "<|image_pad|>": 151655,
8
+ "<|object_ref_end|>": 151647,
9
+ "<|object_ref_start|>": 151646,
10
+ "<|quad_end|>": 151651,
11
+ "<|quad_start|>": 151650,
12
+ "<|video_pad|>": 151656,
13
+ "<|vision_end|>": 151653,
14
+ "<|vision_pad|>": 151654,
15
+ "<|vision_start|>": 151652
16
+ }
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "..pods/main/resolve/prithivMLmods/Open-R1-Mini-Experimental",
3
+ "architectures": [
4
+ "Qwen2VLForConditionalGeneration"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "image_token_id": 151655,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8960,
14
+ "max_position_embeddings": 32768,
15
+ "max_window_layers": 28,
16
+ "model_type": "qwen2_vl",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 2,
20
+ "pad_token_id": 151654,
21
+ "rms_norm_eps": 1e-06,
22
+ "rope_scaling": {
23
+ "mrope_section": [
24
+ 16,
25
+ 24,
26
+ 24
27
+ ],
28
+ "rope_type": "default",
29
+ "type": "default"
30
+ },
31
+ "rope_theta": 1000000.0,
32
+ "sliding_window": 32768,
33
+ "tie_word_embeddings": true,
34
+ "torch_dtype": "bfloat16",
35
+ "transformers_version": "4.48.3",
36
+ "use_cache": true,
37
+ "use_sliding_window": false,
38
+ "video_token_id": 151656,
39
+ "vision_config": {
40
+ "hidden_size": 1536,
41
+ "in_chans": 3,
42
+ "model_type": "qwen2_vl",
43
+ "spatial_patch_size": 14,
44
+ "torch_dtype": "bfloat16"
45
+ },
46
+ "vision_end_token_id": 151653,
47
+ "vision_start_token_id": 151652,
48
+ "vision_token_id": 151654,
49
+ "vocab_size": 151936
50
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "max_length": 32768,
9
+ "pad_token_id": 151654,
10
+ "temperature": 0.01,
11
+ "top_k": 1,
12
+ "top_p": 0.001,
13
+ "transformers_version": "4.48.3"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea84053aa898caf01ee6acc03dd464eac7edb97c5aa97e18538b3a04616a1525
3
+ size 4418050848
open-r1-reasoner-doc-py/open-r1-exp.ipynb ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "-b4-SW1aGOcF"
7
+ },
8
+ "source": [
9
+ "# **Open R1 Reasoning Exp**\n",
10
+ "\n",
11
+ "Qwen2VLForConditionalGeneration"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {
18
+ "id": "oDmd1ZObGSel"
19
+ },
20
+ "outputs": [],
21
+ "source": [
22
+ "!pip install gradio spaces transformers accelerate numpy requests torch torchvision qwen-vl-utils av ipython reportlab fpdf python-docx pillow huggingface_hub"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": null,
28
+ "metadata": {
29
+ "id": "ovBSsRFhGbs2"
30
+ },
31
+ "outputs": [],
32
+ "source": [
33
+ "# Authenticate with Hugging Face\n",
34
+ "from huggingface_hub import login\n",
35
+ "\n",
36
+ "# Log in to Hugging Face using the provided token\n",
37
+ "hf_token = '---xxxxx---'\n",
38
+ "login(hf_token)\n",
39
+ "\n",
40
+ "#Demo\n",
41
+ "import gradio as gr\n",
42
+ "import spaces\n",
43
+ "from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer\n",
44
+ "from qwen_vl_utils import process_vision_info\n",
45
+ "import torch\n",
46
+ "from PIL import Image\n",
47
+ "import os\n",
48
+ "import uuid\n",
49
+ "import io\n",
50
+ "from threading import Thread\n",
51
+ "from reportlab.lib.pagesizes import A4\n",
52
+ "from reportlab.lib.styles import getSampleStyleSheet\n",
53
+ "from reportlab.lib import colors\n",
54
+ "from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer\n",
55
+ "from reportlab.lib.units import inch\n",
56
+ "from reportlab.pdfbase import pdfmetrics\n",
57
+ "from reportlab.pdfbase.ttfonts import TTFont\n",
58
+ "import docx\n",
59
+ "from docx.enum.text import WD_ALIGN_PARAGRAPH\n",
60
+ "\n",
61
+ "# Define model options\n",
62
+ "MODEL_OPTIONS = {\n",
63
+ " \"OpenR1\": \"prithivMLmods/Open-R1-Mini-Experimental\",\n",
64
+ "}\n",
65
+ "\n",
66
+ "# Preload models and processors into CUDA\n",
67
+ "models = {}\n",
68
+ "processors = {}\n",
69
+ "for name, model_id in MODEL_OPTIONS.items():\n",
70
+ " print(f\"Loading {name}...\")\n",
71
+ " models[name] = Qwen2VLForConditionalGeneration.from_pretrained(\n",
72
+ " model_id,\n",
73
+ " trust_remote_code=True,\n",
74
+ " torch_dtype=torch.float16\n",
75
+ " ).to(\"cuda\").eval()\n",
76
+ " processors[name] = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)\n",
77
+ "\n",
78
+ "image_extensions = Image.registered_extensions()\n",
79
+ "\n",
80
+ "def identify_and_save_blob(blob_path):\n",
81
+ " \"\"\"Identifies if the blob is an image and saves it.\"\"\"\n",
82
+ " try:\n",
83
+ " with open(blob_path, 'rb') as file:\n",
84
+ " blob_content = file.read()\n",
85
+ " try:\n",
86
+ " Image.open(io.BytesIO(blob_content)).verify() # Check if it's a valid image\n",
87
+ " extension = \".png\" # Default to PNG for saving\n",
88
+ " media_type = \"image\"\n",
89
+ " except (IOError, SyntaxError):\n",
90
+ " raise ValueError(\"Unsupported media type. Please upload a valid image.\")\n",
91
+ "\n",
92
+ " filename = f\"temp_{uuid.uuid4()}_media{extension}\"\n",
93
+ " with open(filename, \"wb\") as f:\n",
94
+ " f.write(blob_content)\n",
95
+ "\n",
96
+ " return filename, media_type\n",
97
+ "\n",
98
+ " except FileNotFoundError:\n",
99
+ " raise ValueError(f\"The file {blob_path} was not found.\")\n",
100
+ " except Exception as e:\n",
101
+ " raise ValueError(f\"An error occurred while processing the file: {e}\")\n",
102
+ "\n",
103
+ "@spaces.GPU\n",
104
+ "def qwen_inference(model_name, media_input, text_input=None):\n",
105
+ " \"\"\"Handles inference for the selected model.\"\"\"\n",
106
+ " model = models[model_name]\n",
107
+ " processor = processors[model_name]\n",
108
+ "\n",
109
+ " if isinstance(media_input, str):\n",
110
+ " media_path = media_input\n",
111
+ " if media_path.endswith(tuple([i for i in image_extensions.keys()])):\n",
112
+ " media_type = \"image\"\n",
113
+ " else:\n",
114
+ " try:\n",
115
+ " media_path, media_type = identify_and_save_blob(media_input)\n",
116
+ " except Exception as e:\n",
117
+ " raise ValueError(\"Unsupported media type. Please upload a valid image.\")\n",
118
+ "\n",
119
+ " messages = [\n",
120
+ " {\n",
121
+ " \"role\": \"user\",\n",
122
+ " \"content\": [\n",
123
+ " {\n",
124
+ " \"type\": media_type,\n",
125
+ " media_type: media_path\n",
126
+ " },\n",
127
+ " {\"type\": \"text\", \"text\": text_input},\n",
128
+ " ],\n",
129
+ " }\n",
130
+ " ]\n",
131
+ "\n",
132
+ " text = processor.apply_chat_template(\n",
133
+ " messages, tokenize=False, add_generation_prompt=True\n",
134
+ " )\n",
135
+ " image_inputs, _ = process_vision_info(messages)\n",
136
+ " inputs = processor(\n",
137
+ " text=[text],\n",
138
+ " images=image_inputs,\n",
139
+ " padding=True,\n",
140
+ " return_tensors=\"pt\",\n",
141
+ " ).to(\"cuda\")\n",
142
+ "\n",
143
+ " streamer = TextIteratorStreamer(\n",
144
+ " processor.tokenizer, skip_prompt=True, skip_special_tokens=True\n",
145
+ " )\n",
146
+ " generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)\n",
147
+ "\n",
148
+ " thread = Thread(target=model.generate, kwargs=generation_kwargs)\n",
149
+ " thread.start()\n",
150
+ "\n",
151
+ " buffer = \"\"\n",
152
+ " for new_text in streamer:\n",
153
+ " buffer += new_text\n",
154
+ " # Remove <|im_end|> or similar tokens from the output\n",
155
+ " buffer = buffer.replace(\"<|im_end|>\", \"\")\n",
156
+ " yield buffer\n",
157
+ "\n",
158
+ "def format_plain_text(output_text):\n",
159
+ " \"\"\"Formats the output text as plain text without LaTeX delimiters.\"\"\"\n",
160
+ " # Remove LaTeX delimiters and convert to plain text\n",
161
+ " plain_text = output_text.replace(\"\\\\(\", \"\").replace(\"\\\\)\", \"\").replace(\"\\\\[\", \"\").replace(\"\\\\]\", \"\")\n",
162
+ " return plain_text\n",
163
+ "\n",
164
+ "def generate_document(media_path, output_text, file_format, font_size, line_spacing, alignment, image_size):\n",
165
+ " \"\"\"Generates a document with the input image and plain text output.\"\"\"\n",
166
+ " plain_text = format_plain_text(output_text)\n",
167
+ " if file_format == \"pdf\":\n",
168
+ " return generate_pdf(media_path, plain_text, font_size, line_spacing, alignment, image_size)\n",
169
+ " elif file_format == \"docx\":\n",
170
+ " return generate_docx(media_path, plain_text, font_size, line_spacing, alignment, image_size)\n",
171
+ "\n",
172
+ "def generate_pdf(media_path, plain_text, font_size, line_spacing, alignment, image_size):\n",
173
+ " \"\"\"Generates a PDF document.\"\"\"\n",
174
+ " filename = f\"output_{uuid.uuid4()}.pdf\"\n",
175
+ " doc = SimpleDocTemplate(\n",
176
+ " filename,\n",
177
+ " pagesize=A4,\n",
178
+ " rightMargin=inch,\n",
179
+ " leftMargin=inch,\n",
180
+ " topMargin=inch,\n",
181
+ " bottomMargin=inch\n",
182
+ " )\n",
183
+ " styles = getSampleStyleSheet()\n",
184
+ " styles[\"Normal\"].fontSize = int(font_size)\n",
185
+ " styles[\"Normal\"].leading = int(font_size) * line_spacing\n",
186
+ " styles[\"Normal\"].alignment = {\n",
187
+ " \"Left\": 0,\n",
188
+ " \"Center\": 1,\n",
189
+ " \"Right\": 2,\n",
190
+ " \"Justified\": 4\n",
191
+ " }[alignment]\n",
192
+ "\n",
193
+ " story = []\n",
194
+ "\n",
195
+ " # Add image with size adjustment\n",
196
+ " image_sizes = {\n",
197
+ " \"Small\": (200, 200),\n",
198
+ " \"Medium\": (400, 400),\n",
199
+ " \"Large\": (600, 600)\n",
200
+ " }\n",
201
+ " img = RLImage(media_path, width=image_sizes[image_size][0], height=image_sizes[image_size][1])\n",
202
+ " story.append(img)\n",
203
+ " story.append(Spacer(1, 12))\n",
204
+ "\n",
205
+ " # Add plain text output\n",
206
+ " text = Paragraph(plain_text, styles[\"Normal\"])\n",
207
+ " story.append(text)\n",
208
+ "\n",
209
+ " doc.build(story)\n",
210
+ " return filename\n",
211
+ "\n",
212
+ "def generate_docx(media_path, plain_text, font_size, line_spacing, alignment, image_size):\n",
213
+ " \"\"\"Generates a DOCX document.\"\"\"\n",
214
+ " filename = f\"output_{uuid.uuid4()}.docx\"\n",
215
+ " doc = docx.Document()\n",
216
+ "\n",
217
+ " # Add image with size adjustment\n",
218
+ " image_sizes = {\n",
219
+ " \"Small\": docx.shared.Inches(2),\n",
220
+ " \"Medium\": docx.shared.Inches(4),\n",
221
+ " \"Large\": docx.shared.Inches(6)\n",
222
+ " }\n",
223
+ " doc.add_picture(media_path, width=image_sizes[image_size])\n",
224
+ " doc.add_paragraph()\n",
225
+ "\n",
226
+ " # Add plain text output\n",
227
+ " paragraph = doc.add_paragraph()\n",
228
+ " paragraph.paragraph_format.line_spacing = line_spacing\n",
229
+ " paragraph.paragraph_format.alignment = {\n",
230
+ " \"Left\": WD_ALIGN_PARAGRAPH.LEFT,\n",
231
+ " \"Center\": WD_ALIGN_PARAGRAPH.CENTER,\n",
232
+ " \"Right\": WD_ALIGN_PARAGRAPH.RIGHT,\n",
233
+ " \"Justified\": WD_ALIGN_PARAGRAPH.JUSTIFY\n",
234
+ " }[alignment]\n",
235
+ " run = paragraph.add_run(plain_text)\n",
236
+ " run.font.size = docx.shared.Pt(int(font_size))\n",
237
+ "\n",
238
+ " doc.save(filename)\n",
239
+ " return filename\n",
240
+ "\n",
241
+ "# CSS for output styling\n",
242
+ "css = \"\"\"\n",
243
+ " #output {\n",
244
+ " height: 500px;\n",
245
+ " overflow: auto;\n",
246
+ " border: 1px solid #ccc;\n",
247
+ " }\n",
248
+ ".submit-btn {\n",
249
+ " background-color: #cf3434 !important;\n",
250
+ " color: white !important;\n",
251
+ "}\n",
252
+ ".submit-btn:hover {\n",
253
+ " background-color: #ff2323 !important;\n",
254
+ "}\n",
255
+ ".download-btn {\n",
256
+ " background-color: #35a6d6 !important;\n",
257
+ " color: white !important;\n",
258
+ "}\n",
259
+ ".download-btn:hover {\n",
260
+ " background-color: #22bcff !important;\n",
261
+ "}\n",
262
+ "\"\"\"\n",
263
+ "\n",
264
+ "# Gradio app setup\n",
265
+ "with gr.Blocks(css=css) as demo:\n",
266
+ " gr.Markdown(\"# ChemQwen Chemical Identifier\")\n",
267
+ "\n",
268
+ " with gr.Tab(label=\"Image Input\"):\n",
269
+ "\n",
270
+ " with gr.Row():\n",
271
+ " with gr.Column():\n",
272
+ " model_choice = gr.Dropdown(\n",
273
+ " label=\"Model Selection\",\n",
274
+ " choices=list(MODEL_OPTIONS.keys()),\n",
275
+ " value=\"OpenR1\"\n",
276
+ " )\n",
277
+ " input_media = gr.File(\n",
278
+ " label=\"Upload Image\", type=\"filepath\"\n",
279
+ " )\n",
280
+ " text_input = gr.Textbox(label=\"Question\", placeholder=\"Ask a question about the image...\")\n",
281
+ " submit_btn = gr.Button(value=\"Submit\", elem_classes=\"submit-btn\")\n",
282
+ "\n",
283
+ " with gr.Column():\n",
284
+ " output_text = gr.Textbox(label=\"Output Text\", lines=10)\n",
285
+ " plain_text_output = gr.Textbox(label=\"Standardized Plain Text\", lines=10)\n",
286
+ "\n",
287
+ " submit_btn.click(\n",
288
+ " qwen_inference, [model_choice, input_media, text_input], [output_text]\n",
289
+ " ).then(\n",
290
+ " lambda output_text: format_plain_text(output_text), [output_text], [plain_text_output]\n",
291
+ " )\n",
292
+ "\n",
293
+ " # Add examples directly usable by clicking\n",
294
+ " with gr.Row():\n",
295
+ " with gr.Column():\n",
296
+ " line_spacing = gr.Dropdown(\n",
297
+ " choices=[0.5, 1.0, 1.15, 1.5, 2.0, 2.5, 3.0],\n",
298
+ " value=1.5,\n",
299
+ " label=\"Line Spacing\"\n",
300
+ " )\n",
301
+ " font_size = gr.Dropdown(\n",
302
+ " choices=[\"8\", \"10\", \"12\", \"14\", \"16\", \"18\", \"20\", \"22\", \"24\"],\n",
303
+ " value=\"18\",\n",
304
+ " label=\"Font Size\"\n",
305
+ " )\n",
306
+ " alignment = gr.Dropdown(\n",
307
+ " choices=[\"Left\", \"Center\", \"Right\", \"Justified\"],\n",
308
+ " value=\"Justified\",\n",
309
+ " label=\"Text Alignment\"\n",
310
+ " )\n",
311
+ " image_size = gr.Dropdown(\n",
312
+ " choices=[\"Small\", \"Medium\", \"Large\"],\n",
313
+ " value=\"Small\",\n",
314
+ " label=\"Image Size\"\n",
315
+ " )\n",
316
+ " file_format = gr.Radio([\"pdf\", \"docx\"], label=\"File Format\", value=\"pdf\")\n",
317
+ " get_document_btn = gr.Button(value=\"Get Document\", elem_classes=\"download-btn\")\n",
318
+ "\n",
319
+ " get_document_btn.click(\n",
320
+ " generate_document, [input_media, output_text, file_format, font_size, line_spacing, alignment, image_size], gr.File(label=\"Download Document\")\n",
321
+ " )\n",
322
+ "\n",
323
+ "demo.launch(debug=True)"
324
+ ]
325
+ }
326
+ ],
327
+ "metadata": {
328
+ "accelerator": "GPU",
329
+ "colab": {
330
+ "gpuType": "T4",
331
+ "provenance": []
332
+ },
333
+ "kernelspec": {
334
+ "display_name": "Python 3",
335
+ "name": "python3"
336
+ },
337
+ "language_info": {
338
+ "name": "python"
339
+ }
340
+ },
341
+ "nbformat": 4,
342
+ "nbformat_minor": 0
343
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 12845056,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 12845056,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|vision_pad|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948c45c29a91dd2e6ae77d6f5a324a3d408bcca6ad443365b2e79986f1422771
3
+ size 11420540
tokenizer_config.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ }
116
+ },
117
+ "additional_special_tokens": [
118
+ "<|im_start|>",
119
+ "<|im_end|>",
120
+ "<|object_ref_start|>",
121
+ "<|object_ref_end|>",
122
+ "<|box_start|>",
123
+ "<|box_end|>",
124
+ "<|quad_start|>",
125
+ "<|quad_end|>",
126
+ "<|vision_start|>",
127
+ "<|vision_end|>",
128
+ "<|vision_pad|>",
129
+ "<|image_pad|>",
130
+ "<|video_pad|>"
131
+ ],
132
+ "bos_token": null,
133
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
134
+ "clean_up_tokenization_spaces": false,
135
+ "eos_token": "<|im_end|>",
136
+ "errors": "replace",
137
+ "extra_special_tokens": {},
138
+ "model_max_length": 32768,
139
+ "pad_token": "<|vision_pad|>",
140
+ "padding_side": "right",
141
+ "processor_class": "Qwen2VLProcessor",
142
+ "split_special_tokens": false,
143
+ "tokenizer_class": "Qwen2Tokenizer",
144
+ "unk_token": null
145
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff