hayas commited on
Commit
feb8b85
·
1 Parent(s): 9d8df0c
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.pre-commit-config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: check-executables-have-shebangs
6
+ - id: check-json
7
+ - id: check-merge-conflict
8
+ - id: check-shebang-scripts-are-executable
9
+ - id: check-toml
10
+ - id: check-yaml
11
+ - id: end-of-file-fixer
12
+ - id: mixed-line-ending
13
+ args: ["--fix=lf"]
14
+ - id: requirements-txt-fixer
15
+ - id: trailing-whitespace
16
+ - repo: https://github.com/astral-sh/ruff-pre-commit
17
+ rev: v0.11.0
18
+ hooks:
19
+ - id: ruff
20
+ args: ["--fix"]
21
+ - id: ruff-format
22
+ - repo: https://github.com/pre-commit/mirrors-mypy
23
+ rev: v1.15.0
24
+ hooks:
25
+ - id: mypy
26
+ args: ["--ignore-missing-imports"]
27
+ additional_dependencies:
28
+ [
29
+ "types-python-slugify",
30
+ "types-pytz",
31
+ "types-PyYAML",
32
+ "types-requests",
33
+ ]
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
.vscode/extensions.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "ms-python.python",
4
+ "charliermarsh.ruff",
5
+ "streetsidesoftware.code-spell-checker",
6
+ "tamasfe.even-better-toml"
7
+ ]
8
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "files.insertFinalNewline": false,
4
+ "[python]": {
5
+ "editor.defaultFormatter": "charliermarsh.ruff",
6
+ "editor.formatOnType": true,
7
+ "editor.codeActionsOnSave": {
8
+ "source.fixAll.ruff": "explicit",
9
+ "source.organizeImports": "explicit"
10
+ }
11
+ },
12
+ "[jupyter]": {
13
+ "files.insertFinalNewline": false
14
+ },
15
+ "notebook.output.scrolling": true,
16
+ "notebook.formatOnSave.enabled": true
17
+ }
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Sarashina2 Vision 14b
3
- emoji: 👁
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.21.0
8
  app_file: app.py
 
1
  ---
2
+ title: Sarashina2 Vision 14B
3
+ emoji:
4
+ colorFrom: red
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.21.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ from collections.abc import Iterator
4
+ from threading import Thread
5
+
6
+ import gradio as gr
7
+ import PIL.Image
8
+ import spaces
9
+ from transformers import AutoModelForCausalLM, AutoProcessor, TextIteratorStreamer
10
+
11
+ model_id = "sbintuitions/sarashina2-vision-14b"
12
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
13
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="cuda", trust_remote_code=True)
14
+
15
+
16
+ @spaces.GPU
17
+ def run(
18
+ message: dict,
19
+ history: list[dict],
20
+ max_new_tokens: int = 256,
21
+ ) -> Iterator[str]:
22
+ if not history and len(message["files"]) == 0:
23
+ gr.Warning("Please upload an image.")
24
+ yield ""
25
+ return
26
+ if history and len(message["files"]) > 0:
27
+ gr.Warning("Only one image is allowed.")
28
+ yield ""
29
+ return
30
+
31
+ if not history:
32
+ image = PIL.Image.open(message["files"][0])
33
+ messages = []
34
+ for past_message in history:
35
+ content = past_message["content"]
36
+ if isinstance(content, tuple):
37
+ image = PIL.Image.open(content[0])
38
+ else:
39
+ messages.append({"role": past_message["role"], "content": past_message["content"]})
40
+ messages.append({"role": "user", "content": message["text"]})
41
+
42
+ text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
43
+ inputs = processor(
44
+ text=[text_prompt],
45
+ images=[image],
46
+ padding=True,
47
+ return_tensors="pt",
48
+ )
49
+ inputs = inputs.to(model.device)
50
+
51
+ streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
52
+ generate_kwargs = dict(
53
+ inputs,
54
+ streamer=streamer,
55
+ max_new_tokens=max_new_tokens,
56
+ temperature=0,
57
+ do_sample=False,
58
+ stopping_criteria=processor.get_stopping_criteria(["\n###"]),
59
+ )
60
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
61
+ t.start()
62
+
63
+ output = ""
64
+ for delta in streamer:
65
+ output += delta
66
+ yield output
67
+
68
+
69
+ examples = [
70
+ [
71
+ {
72
+ "text": "この写真に写っているもので、最も有名と考えられる建築物は何でどこに写っていますか?",
73
+ "files": ["assets/sample.jpg"],
74
+ }
75
+ ],
76
+ ]
77
+
78
+ demo = gr.ChatInterface(
79
+ fn=run,
80
+ type="messages",
81
+ multimodal=True,
82
+ textbox=gr.MultimodalTextbox(file_types=["image"], file_count="single"),
83
+ additional_inputs=[gr.Slider(label="Max new tokens", minimum=10, maximum=1024, step=1, value=256)],
84
+ examples=examples,
85
+ title="sbintuitions/sarashina2-vision-14b",
86
+ cache_examples=False,
87
+ run_examples_on_click=False,
88
+ css_paths="style.css",
89
+ )
90
+ if __name__ == "__main__":
91
+ demo.launch()
assets/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 SB Intuitions
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
assets/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ The image in this direcory is copied from https://huggingface.co/sbintuitions/sarashina2-vision-14b/resolve/main/sample.jpg.
2
+ The original content is licensed under the MIT License. See the LICENSE file for details.
assets/sample.jpg ADDED

Git LFS Details

  • SHA256: fec4aaeb7320998e81ab2ae24e6568db9d0fd8d108a19daf2d4107c899e71d32
  • Pointer size: 132 Bytes
  • Size of remote file: 2.51 MB
pyproject.toml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "sarashina2-vision-14b"
3
+ version = "0.1.0"
4
+ description = ""
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "accelerate>=1.5.2",
9
+ "gradio>=5.21.0",
10
+ "hf-transfer>=0.1.9",
11
+ "protobuf>=6.30.1",
12
+ "sentencepiece>=0.2.0",
13
+ "spaces>=0.32.0",
14
+ "torch==2.4.0",
15
+ "torchvision>=0.19.0",
16
+ "transformers==4.47.0",
17
+ ]
18
+
19
+ [tool.ruff]
20
+ line-length = 119
21
+
22
+ [tool.ruff.lint]
23
+ select = ["ALL"]
24
+ ignore = [
25
+ "COM812", # missing-trailing-comma
26
+ "D203", # one-blank-line-before-class
27
+ "D213", # multi-line-summary-second-line
28
+ "E501", # line-too-long
29
+ "SIM117", # multiple-with-statements
30
+ #
31
+ "D100", # undocumented-public-module
32
+ "D101", # undocumented-public-class
33
+ "D102", # undocumented-public-method
34
+ "D103", # undocumented-public-function
35
+ "D104", # undocumented-public-package
36
+ "D105", # undocumented-magic-method
37
+ "D107", # undocumented-public-init
38
+ "EM101", # raw-string-in-exception
39
+ "FBT001", # boolean-type-hint-positional-argument
40
+ "FBT002", # boolean-default-value-positional-argument
41
+ "PD901", # pandas-df-variable-name
42
+ "PGH003", # blanket-type-ignore
43
+ "PLR0913", # too-many-arguments
44
+ "PLR0915", # too-many-statements
45
+ "TRY003", # raise-vanilla-args
46
+ ]
47
+ unfixable = [
48
+ "F401", # unused-import
49
+ ]
50
+
51
+ [tool.ruff.lint.pydocstyle]
52
+ convention = "google"
53
+
54
+ [tool.ruff.lint.per-file-ignores]
55
+ "*.ipynb" = ["T201", "T203"]
56
+
57
+ [tool.ruff.format]
58
+ docstring-code-format = true
requirements.txt ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o requirements.txt
3
+ accelerate==1.5.2
4
+ # via sarashina2-vision-14b (pyproject.toml)
5
+ aiofiles==23.2.1
6
+ # via gradio
7
+ annotated-types==0.7.0
8
+ # via pydantic
9
+ anyio==4.9.0
10
+ # via
11
+ # gradio
12
+ # httpx
13
+ # starlette
14
+ certifi==2025.1.31
15
+ # via
16
+ # httpcore
17
+ # httpx
18
+ # requests
19
+ charset-normalizer==3.4.1
20
+ # via requests
21
+ click==8.1.8
22
+ # via
23
+ # typer
24
+ # uvicorn
25
+ exceptiongroup==1.2.2
26
+ # via anyio
27
+ fastapi==0.115.11
28
+ # via gradio
29
+ ffmpy==0.5.0
30
+ # via gradio
31
+ filelock==3.18.0
32
+ # via
33
+ # huggingface-hub
34
+ # torch
35
+ # transformers
36
+ # triton
37
+ fsspec==2025.3.0
38
+ # via
39
+ # gradio-client
40
+ # huggingface-hub
41
+ # torch
42
+ gradio==5.21.0
43
+ # via
44
+ # sarashina2-vision-14b (pyproject.toml)
45
+ # spaces
46
+ gradio-client==1.7.2
47
+ # via gradio
48
+ groovy==0.1.2
49
+ # via gradio
50
+ h11==0.14.0
51
+ # via
52
+ # httpcore
53
+ # uvicorn
54
+ hf-transfer==0.1.9
55
+ # via sarashina2-vision-14b (pyproject.toml)
56
+ httpcore==1.0.7
57
+ # via httpx
58
+ httpx==0.28.1
59
+ # via
60
+ # gradio
61
+ # gradio-client
62
+ # safehttpx
63
+ # spaces
64
+ huggingface-hub==0.29.3
65
+ # via
66
+ # accelerate
67
+ # gradio
68
+ # gradio-client
69
+ # tokenizers
70
+ # transformers
71
+ idna==3.10
72
+ # via
73
+ # anyio
74
+ # httpx
75
+ # requests
76
+ jinja2==3.1.6
77
+ # via
78
+ # gradio
79
+ # torch
80
+ markdown-it-py==3.0.0
81
+ # via rich
82
+ markupsafe==2.1.5
83
+ # via
84
+ # gradio
85
+ # jinja2
86
+ mdurl==0.1.2
87
+ # via markdown-it-py
88
+ mpmath==1.3.0
89
+ # via sympy
90
+ networkx==3.4.2
91
+ # via torch
92
+ numpy==2.2.4
93
+ # via
94
+ # accelerate
95
+ # gradio
96
+ # pandas
97
+ # torchvision
98
+ # transformers
99
+ nvidia-cublas-cu12==12.1.3.1
100
+ # via
101
+ # nvidia-cudnn-cu12
102
+ # nvidia-cusolver-cu12
103
+ # torch
104
+ nvidia-cuda-cupti-cu12==12.1.105
105
+ # via torch
106
+ nvidia-cuda-nvrtc-cu12==12.1.105
107
+ # via torch
108
+ nvidia-cuda-runtime-cu12==12.1.105
109
+ # via torch
110
+ nvidia-cudnn-cu12==9.1.0.70
111
+ # via torch
112
+ nvidia-cufft-cu12==11.0.2.54
113
+ # via torch
114
+ nvidia-curand-cu12==10.3.2.106
115
+ # via torch
116
+ nvidia-cusolver-cu12==11.4.5.107
117
+ # via torch
118
+ nvidia-cusparse-cu12==12.1.0.106
119
+ # via
120
+ # nvidia-cusolver-cu12
121
+ # torch
122
+ nvidia-nccl-cu12==2.20.5
123
+ # via torch
124
+ nvidia-nvjitlink-cu12==12.8.93
125
+ # via
126
+ # nvidia-cusolver-cu12
127
+ # nvidia-cusparse-cu12
128
+ nvidia-nvtx-cu12==12.1.105
129
+ # via torch
130
+ orjson==3.10.15
131
+ # via gradio
132
+ packaging==24.2
133
+ # via
134
+ # accelerate
135
+ # gradio
136
+ # gradio-client
137
+ # huggingface-hub
138
+ # spaces
139
+ # transformers
140
+ pandas==2.2.3
141
+ # via gradio
142
+ pillow==11.1.0
143
+ # via
144
+ # gradio
145
+ # torchvision
146
+ protobuf==6.30.1
147
+ # via sarashina2-vision-14b (pyproject.toml)
148
+ psutil==5.9.8
149
+ # via
150
+ # accelerate
151
+ # spaces
152
+ pydantic==2.10.6
153
+ # via
154
+ # fastapi
155
+ # gradio
156
+ # spaces
157
+ pydantic-core==2.27.2
158
+ # via pydantic
159
+ pydub==0.25.1
160
+ # via gradio
161
+ pygments==2.19.1
162
+ # via rich
163
+ python-dateutil==2.9.0.post0
164
+ # via pandas
165
+ python-multipart==0.0.20
166
+ # via gradio
167
+ pytz==2025.1
168
+ # via pandas
169
+ pyyaml==6.0.2
170
+ # via
171
+ # accelerate
172
+ # gradio
173
+ # huggingface-hub
174
+ # transformers
175
+ regex==2024.11.6
176
+ # via transformers
177
+ requests==2.32.3
178
+ # via
179
+ # huggingface-hub
180
+ # spaces
181
+ # transformers
182
+ rich==13.9.4
183
+ # via typer
184
+ ruff==0.11.0
185
+ # via gradio
186
+ safehttpx==0.1.6
187
+ # via gradio
188
+ safetensors==0.5.3
189
+ # via
190
+ # accelerate
191
+ # transformers
192
+ semantic-version==2.10.0
193
+ # via gradio
194
+ sentencepiece==0.2.0
195
+ # via sarashina2-vision-14b (pyproject.toml)
196
+ shellingham==1.5.4
197
+ # via typer
198
+ six==1.17.0
199
+ # via python-dateutil
200
+ sniffio==1.3.1
201
+ # via anyio
202
+ spaces==0.32.0
203
+ # via sarashina2-vision-14b (pyproject.toml)
204
+ starlette==0.46.1
205
+ # via
206
+ # fastapi
207
+ # gradio
208
+ sympy==1.13.3
209
+ # via torch
210
+ tokenizers==0.21.1
211
+ # via transformers
212
+ tomlkit==0.13.2
213
+ # via gradio
214
+ torch==2.4.0
215
+ # via
216
+ # sarashina2-vision-14b (pyproject.toml)
217
+ # accelerate
218
+ # torchvision
219
+ torchvision==0.19.0
220
+ # via sarashina2-vision-14b (pyproject.toml)
221
+ tqdm==4.67.1
222
+ # via
223
+ # huggingface-hub
224
+ # transformers
225
+ transformers==4.47.0
226
+ # via sarashina2-vision-14b (pyproject.toml)
227
+ triton==3.0.0
228
+ # via torch
229
+ typer==0.15.2
230
+ # via gradio
231
+ typing-extensions==4.12.2
232
+ # via
233
+ # anyio
234
+ # fastapi
235
+ # gradio
236
+ # gradio-client
237
+ # huggingface-hub
238
+ # pydantic
239
+ # pydantic-core
240
+ # rich
241
+ # spaces
242
+ # torch
243
+ # typer
244
+ # uvicorn
245
+ tzdata==2025.1
246
+ # via pandas
247
+ urllib3==2.3.0
248
+ # via requests
249
+ uvicorn==0.34.0
250
+ # via gradio
251
+ websockets==15.0.1
252
+ # via gradio-client
style.css ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ display: block;
4
+ }
uv.lock ADDED
The diff for this file is too large to render. See raw diff