lzyhn commited on
Commit
dfe74d0
·
verified ·
1 Parent(s): fba29f7

Add 2 files

Browse files
Files changed (2) hide show
  1. README.md +7 -5
  2. index.html +669 -19
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Deepseek
3
- emoji: 😻
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: deepseek
3
+ emoji: 🐳
4
+ colorFrom: pink
5
+ colorTo: gray
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,669 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="zh-CN">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>DeepSeek V3 0324 部署指南</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <style>
10
+ .code-block {
11
+ background-color: #2d2d2d;
12
+ color: #f8f8f2;
13
+ padding: 1rem;
14
+ border-radius: 0.5rem;
15
+ font-family: 'Courier New', Courier, monospace;
16
+ overflow-x: auto;
17
+ margin: 1rem 0;
18
+ position: relative;
19
+ }
20
+ .copy-btn {
21
+ position: absolute;
22
+ right: 0.5rem;
23
+ top: 0.5rem;
24
+ background-color: #4a5568;
25
+ color: white;
26
+ border: none;
27
+ border-radius: 0.25rem;
28
+ padding: 0.25rem 0.5rem;
29
+ cursor: pointer;
30
+ font-size: 0.75rem;
31
+ }
32
+ .copy-btn:hover {
33
+ background-color: #2d3748;
34
+ }
35
+ .note {
36
+ background-color: #e3f2fd;
37
+ border-left: 4px solid #2196f3;
38
+ padding: 1rem;
39
+ margin: 1rem 0;
40
+ border-radius: 0 0.5rem 0.5rem 0;
41
+ }
42
+ .warning {
43
+ background-color: #fff8e1;
44
+ border-left: 4px solid #ffc107;
45
+ padding: 1rem;
46
+ margin: 1rem 0;
47
+ border-radius: 0 0.5rem 0.5rem 0;
48
+ }
49
+ .success {
50
+ background-color: #e8f5e9;
51
+ border-left: 4px solid #4caf50;
52
+ padding: 1rem;
53
+ margin: 1rem 0;
54
+ border-radius: 0 0.5rem 0.5rem 0;
55
+ }
56
+ .hardware-req {
57
+ background-color: #f5f5f5;
58
+ border-radius: 0.5rem;
59
+ padding: 1rem;
60
+ margin: 1rem 0;
61
+ }
62
+ .tab-content {
63
+ display: none;
64
+ }
65
+ .tab-content.active {
66
+ display: block;
67
+ }
68
+ .tab-button {
69
+ background-color: #f1f1f1;
70
+ border: none;
71
+ padding: 10px 16px;
72
+ cursor: pointer;
73
+ transition: 0.3s;
74
+ border-radius: 5px 5px 0 0;
75
+ margin-right: 5px;
76
+ }
77
+ .tab-button:hover {
78
+ background-color: #ddd;
79
+ }
80
+ .tab-button.active {
81
+ background-color: #4CAF50;
82
+ color: white;
83
+ }
84
+ </style>
85
+ </head>
86
+ <body class="bg-gray-50 text-gray-800">
87
+ <div class="container mx-auto px-4 py-8 max-w-5xl">
88
+ <header class="mb-8 text-center">
89
+ <h1 class="text-4xl font-bold text-green-700 mb-2">DeepSeek V3 0324 部署指南</h1>
90
+ <p class="text-xl text-gray-600">NVIDIA A6000 + Dell T7910 内网工作站</p>
91
+ <div class="flex justify-center mt-4">
92
+ <span class="bg-green-100 text-green-800 text-sm font-medium px-2.5 py-0.5 rounded mr-2">Ktransformers</span>
93
+ <span class="bg-blue-100 text-blue-800 text-sm font-medium px-2.5 py-0.5 rounded mr-2">Unsloth</span>
94
+ <span class="bg-purple-100 text-purple-800 text-sm font-medium px-2.5 py-0.5 rounded">GGUF量化模型</span>
95
+ </div>
96
+ </header>
97
+
98
+ <div class="bg-white rounded-lg shadow-md p-6 mb-8">
99
+ <h2 class="text-2xl font-semibold mb-4 text-gray-800 border-b pb-2">
100
+ <i class="fas fa-info-circle text-blue-500 mr-2"></i>部署方案概述
101
+ </h2>
102
+ <p class="mb-4">本指南详细介绍了在NVIDIA A6000显卡的Dell T7910内网工作站上部署DeepSeek V3 0324大语言模型的完整流程,采用Ktransformers+Unsloth联合部署方案。</p>
103
+
104
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-6">
105
+ <div class="bg-gray-50 p-4 rounded-lg">
106
+ <h3 class="font-medium text-lg mb-2 text-green-700"><i class="fas fa-laptop-code mr-2"></i>外网准备阶段</h3>
107
+ <ul class="list-disc pl-5 space-y-1">
108
+ <li>在可访问外网的Windows电脑上使用WSL</li>
109
+ <li>完成所有依赖项的安装</li>
110
+ <li>下载模型文件和配置</li>
111
+ </ul>
112
+ </div>
113
+ <div class="bg-gray-50 p-4 rounded-lg">
114
+ <h3 class="font-medium text-lg mb-2 text-blue-700"><i class="fas fa-server mr-2"></i>内网部署阶段</h3>
115
+ <ul class="list-disc pl-5 space-y-1">
116
+ <li>将完整环境复制到固态硬盘</li>
117
+ <li>插入内网工作站启动</li>
118
+ <li>验证模型运行</li>
119
+ </ul>
120
+ </div>
121
+ </div>
122
+
123
+ <div class="note">
124
+ <i class="fas fa-lightbulb text-yellow-500 mr-2"></i>
125
+ <strong>提示:</strong> 本方案特别适合中国网络环境,尽可能使用国内下载源加速部署过程。
126
+ </div>
127
+ </div>
128
+
129
+ <div class="bg-white rounded-lg shadow-md p-6 mb-8">
130
+ <h2 class="text-2xl font-semibold mb-4 text-gray-800 border-b pb-2">
131
+ <i class="fas fa-download text-purple-500 mr-2"></i>准备工作
132
+ </h2>
133
+
134
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">1. 系统要求</h3>
135
+ <div class="hardware-req">
136
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-4">
137
+ <div>
138
+ <h4 class="font-medium mb-2"><i class="fas fa-desktop mr-2"></i>硬件配置</h4>
139
+ <ul class="list-disc pl-5 space-y-1">
140
+ <li>NVIDIA A6000显卡 (48GB显存)</li>
141
+ <li>Dell Precision T7910工作站</li>
142
+ <li>至少64GB系统内存</li>
143
+ <li>高速固态硬盘(建议NVMe)</li>
144
+ </ul>
145
+ </div>
146
+ <div>
147
+ <h4 class="font-medium mb-2"><i class="fas fa-cog mr-2"></i>软件环境</h4>
148
+ <ul class="list-disc pl-5 space-y-1">
149
+ <li>Windows 10/11 with WSL2</li>
150
+ <li>Ubuntu 20.04/22.04 LTS (WSL)</li>
151
+ <li>Python 3.10+</li>
152
+ <li>CUDA 11.8/12.1</li>
153
+ </ul>
154
+ </div>
155
+ </div>
156
+ </div>
157
+
158
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">2. 模型选择</h3>
159
+ <p class="mb-4">我们将使用ModelScope上的Unsloth提供的DeepSeek-V3-0324 GGUF量化模型,以下是可选模型及其硬件需求:</p>
160
+
161
+ <div class="overflow-x-auto">
162
+ <table class="min-w-full bg-white border border-gray-200">
163
+ <thead class="bg-gray-100">
164
+ <tr>
165
+ <th class="py-2 px-4 border-b">模型名称</th>
166
+ <th class="py-2 px-4 border-b">量化级别</th>
167
+ <th class="py-2 px-4 border-b">显存需求</th>
168
+ <th class="py-2 px-4 border-b">内存需求</th>
169
+ <th class="py-2 px-4 border-b">适用场景</th>
170
+ </tr>
171
+ </thead>
172
+ <tbody>
173
+ <tr>
174
+ <td class="py-2 px-4 border-b">deepseek-v3-0324-Q2_K.gguf</td>
175
+ <td class="py-2 px-4 border-b">Q2_K (极低精度)</td>
176
+ <td class="py-2 px-4 border-b">~12GB</td>
177
+ <td class="py-2 px-4 border-b">32GB+</td>
178
+ <td class="py-2 px-4 border-b">快速推理,低资源</td>
179
+ </tr>
180
+ <tr class="bg-gray-50">
181
+ <td class="py-2 px-4 border-b">deepseek-v3-0324-Q4_K_M.gguf</td>
182
+ <td class="py-2 px-4 border-b">Q4_K_M (中等精度)</td>
183
+ <td class="py-2 px-4 border-b">~18GB</td>
184
+ <td class="py-2 px-4 border-b">48GB+</td>
185
+ <td class="py-2 px-4 border-b">平衡精度与速度</td>
186
+ </tr>
187
+ <tr>
188
+ <td class="py-2 px-4 border-b">deepseek-v3-0324-Q5_K_M.gguf</td>
189
+ <td class="py-2 px-4 border-b">Q5_K_M (较高精度)</td>
190
+ <td class="py-2 px-4 border-b">~22GB</td>
191
+ <td class="py-2 px-4 border-b">64GB+</td>
192
+ <td class="py-2 px-4 border-b">高质量推理</td>
193
+ </tr>
194
+ <tr class="bg-gray-50">
195
+ <td class="py-2 px-4 border-b">deepseek-v3-0324-Q6_K.gguf</td>
196
+ <td class="py-2 px-4 border-b">Q6_K (高精度)</td>
197
+ <td class="py-2 px-4 border-b">~26GB</td>
198
+ <td class="py-2 px-4 border-b">64GB+</td>
199
+ <td class="py-2 px-4 border-b">最高质量推理</td>
200
+ </tr>
201
+ </tbody>
202
+ </table>
203
+ </div>
204
+
205
+ <div class="note mt-4">
206
+ <i class="fas fa-lightbulb text-yellow-500 mr-2"></i>
207
+ <strong>建议:</strong> 对于NVIDIA A6000显卡(48GB显存),推荐使用Q5_K_M或Q6_K量化级别的模型,以获得最佳性能与质量的平衡。
208
+ </div>
209
+ </div>
210
+
211
+ <div class="bg-white rounded-lg shadow-md p-6 mb-8">
212
+ <h2 class="text-2xl font-semibold mb-4 text-gray-800 border-b pb-2">
213
+ <i class="fas fa-terminal text-green-500 mr-2"></i>外网环境部署 (WSL)
214
+ </h2>
215
+
216
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">1. 设置WSL环境</h3>
217
+ <p class="mb-4">在Windows电脑上启用WSL并安装Ubuntu:</p>
218
+
219
+ <div class="code-block">
220
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
221
+ <code># 以管理员身份打开PowerShell
222
+ wsl --install -d Ubuntu-22.04
223
+ wsl --set-version Ubuntu-22.04 2
224
+ wsl -d Ubuntu-22.04</code>
225
+ </div>
226
+
227
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">2. 配置Ubuntu环境</h3>
228
+ <p class="mb-4">在WSL的Ubuntu中执行以下命令:</p>
229
+
230
+ <div class="code-block">
231
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
232
+ <code># 更新系统并安装基础工具
233
+ sudo apt update && sudo apt upgrade -y
234
+ sudo apt install -y build-essential cmake git wget python3-pip python3-venv
235
+
236
+ # 配置国内源 (阿里云)
237
+ sudo sed -i 's|http://archive.ubuntu.com|https://mirrors.aliyun.com|g' /etc/apt/sources.list
238
+ sudo sed -i 's|http://security.ubuntu.com|https://mirrors.aliyun.com|g' /etc/apt/sources.list
239
+
240
+ # 安装CUDA工具包 (使用国内源)
241
+ wget https://developer.download.nvidia.cn/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin
242
+ sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
243
+ wget https://developer.download.nvidia.cn/compute/cuda/12.1.1/local_installers/cuda-repo-wsl-ubuntu-12-1-local_12.1.1-1_amd64.deb
244
+ sudo dpkg -i cuda-repo-wsl-ubuntu-12-1-local_12.1.1-1_amd64.deb
245
+ sudo cp /var/cuda-repo-wsl-ubuntu-12-1-local/cuda-*-keyring.gpg /usr/share/keyrings/
246
+ sudo apt-get update
247
+ sudo apt-get -y install cuda
248
+
249
+ # 验证CUDA安装
250
+ nvidia-smi
251
+ nvcc --version</code>
252
+ </div>
253
+
254
+ <div class="note mt-4">
255
+ <i class="fas fa-lightbulb text-yellow-500 mr-2"></i>
256
+ <strong>注意:</strong> 如果遇到网络问题,可以尝试使用清华源或中科大源替换阿里云源。
257
+ </div>
258
+
259
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">3. 创建Python虚拟环境</h3>
260
+
261
+ <div class="code-block">
262
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
263
+ <code># 创建虚拟环境
264
+ python3 -m venv deepseek-env
265
+ source deepseek-env/bin/activate
266
+
267
+ # 配置pip国内源
268
+ pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
269
+ pip install --upgrade pip
270
+
271
+ # 安装基础依赖
272
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
273
+ pip install transformers accelerate sentencepiece ninja</code>
274
+ </div>
275
+
276
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">4. 安装Ktransformers和Unsloth</h3>
277
+
278
+ <div class="code-block">
279
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
280
+ <code># 安装Ktransformers
281
+ pip install ktransformers
282
+
283
+ # 安装Unsloth (使用国内Git镜像)
284
+ git clone https://gitee.com/mirrors/unsloth.git
285
+ cd unsloth
286
+ pip install -e .
287
+
288
+ # 或者直接从PyPI安装 (可能较慢)
289
+ # pip install unsloth</code>
290
+ </div>
291
+
292
+ <div class="warning mt-4">
293
+ <i class="fas fa-exclamation-triangle text-orange-500 mr-2"></i>
294
+ <strong>重要:</strong> 如果直接从PyPI安装速度过慢,建议使用Git镜像源克隆仓库后本地安装。
295
+ </div>
296
+
297
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">5. 下载DeepSeek V3 0324模型</h3>
298
+ <p class="mb-4">从ModelScope下载GGUF量化模型:</p>
299
+
300
+ <div class="code-block">
301
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
302
+ <code># 创建模型目录
303
+ mkdir -p ~/models/deepseek-v3-0324
304
+ cd ~/models/deepseek-v3-0324
305
+
306
+ # 使用国内镜像下载模型 (以Q5_K_M为例)
307
+ wget https://modelscope.cn/api/v1/models/unsloth/DeepSeek-V3-0324-GGUF/repo?Revision=master&FilePath=deepseek-v3-0324-Q5_K_M.gguf -O deepseek-v3-0324-Q5_K_M.gguf
308
+
309
+ # 可选: 下载其他量化级别的模型
310
+ wget https://modelscope.cn/api/v1/models/unsloth/DeepSeek-V3-0324-GGUF/repo?Revision=master&FilePath=deepseek-v3-0324-Q4_K_M.gguf -O deepseek-v3-0324-Q4_K_M.gguf
311
+ wget https://modelscope.cn/api/v1/models/unsloth/DeepSeek-V3-0324-GGUF/repo?Revision=master&FilePath=deepseek-v3-0324-Q6_K.gguf -O deepseek-v3-0324-Q6_K.gguf</code>
312
+ </div>
313
+
314
+ <div class="note mt-4">
315
+ <i class="fas fa-lightbulb text-yellow-500 mr-2"></i>
316
+ <strong>提示:</strong> 模型文件较大(10GB+),请确保有足够的磁盘空间。下载完成后可以验证文件的MD5/SHA256校验和。
317
+ </div>
318
+ </div>
319
+
320
+ <div class="bg-white rounded-lg shadow-md p-6 mb-8">
321
+ <h2 class="text-2xl font-semibold mb-4 text-gray-800 border-b pb-2">
322
+ <i class="fas fa-cogs text-blue-500 mr-2"></i>模型配置与测试
323
+ </h2>
324
+
325
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">1. 创建测试脚本</h3>
326
+ <p class="mb-4">创建一个Python脚本测试模型是否能正常运行:</p>
327
+
328
+ <div class="code-block">
329
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
330
+ <code># 创建测试脚本
331
+ cat &lt;&lt; 'EOF' &gt; test_deepseek.py
332
+ from unsloth import FastLanguageModel
333
+ import torch
334
+
335
+ model_path = "/home/yourusername/models/deepseek-v3-0324/deepseek-v3-0324-Q5_K_M.gguf"
336
+ model, tokenizer = FastLanguageModel.from_pretrained(model_path)
337
+
338
+ # 配置模型参数
339
+ FastLanguageModel.for_inference(model)
340
+ model.config.use_cache = True
341
+ model.config.max_seq_length = 4096 # 根据显存调整
342
+
343
+ # 测试推理
344
+ inputs = tokenizer("你好,DeepSeek V3!", return_tensors="pt").to("cuda")
345
+ outputs = model.generate(**inputs, max_new_tokens=64)
346
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
347
+ EOF
348
+
349
+ # 运行测试脚本
350
+ python test_deepseek.py</code>
351
+ </div>
352
+
353
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">2. 优化配置参数</h3>
354
+ <p class="mb-4">根据NVIDIA A6000显卡的48GB显存,以下是推荐的配置参数:</p>
355
+
356
+ <div class="tabs mb-4">
357
+ <button class="tab-button active" onclick="openTab(event, 'q5-tab')">Q5_K_M 配置</button>
358
+ <button class="tab-button" onclick="openTab(event, 'q6-tab')">Q6_K 配置</button>
359
+ <button class="tab-button" onclick="openTab(event, 'q4-tab')">Q4_K_M 配置</button>
360
+ </div>
361
+
362
+ <div id="q5-tab" class="tab-content active">
363
+ <div class="code-block">
364
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
365
+ <code># Q5_K_M 量化模型配置 (推荐)
366
+ model_config = {
367
+ "model_path": "/path/to/deepseek-v3-0324-Q5_K_M.gguf",
368
+ "n_gpu_layers": 40, # 使用尽可能多的GPU层
369
+ "n_ctx": 4096, # 上下文长度
370
+ "n_batch": 512, # 批处理大小
371
+ "n_threads": 12, # CPU线程数(根据CPU核心数调整)
372
+ "max_new_tokens": 1024,
373
+ "temperature": 0.7,
374
+ "top_p": 0.9,
375
+ "repetition_penalty": 1.1,
376
+ }</code>
377
+ </div>
378
+ </div>
379
+
380
+ <div id="q6-tab" class="tab-content">
381
+ <div class="code-block">
382
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
383
+ <code># Q6_K 量化模型配置 (高质量)
384
+ model_config = {
385
+ "model_path": "/path/to/deepseek-v3-0324-Q6_K.gguf",
386
+ "n_gpu_layers": 35, # 减少GPU层数以适应更大模型
387
+ "n_ctx": 4096,
388
+ "n_batch": 384, # 减小批处理大小
389
+ "n_threads": 12,
390
+ "max_new_tokens": 768,
391
+ "temperature": 0.7,
392
+ "top_p": 0.9,
393
+ "repetition_penalty": 1.1,
394
+ }</code>
395
+ </div>
396
+ </div>
397
+
398
+ <div id="q4-tab" class="tab-content">
399
+ <div class="code-block">
400
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
401
+ <code># Q4_K_M 量化模型配置 (高性能)
402
+ model_config = {
403
+ "model_path": "/path/to/deepseek-v3-0324-Q4_K_M.gguf",
404
+ "n_gpu_layers": 45, # 可以使用更多GPU层
405
+ "n_ctx": 4096,
406
+ "n_batch": 768, # 增大批处理大小
407
+ "n_threads": 12,
408
+ "max_new_tokens": 2048,
409
+ "temperature": 0.7,
410
+ "top_p": 0.9,
411
+ "repetition_penalty": 1.1,
412
+ }</code>
413
+ </div>
414
+ </div>
415
+
416
+ <div class="success mt-4">
417
+ <i class="fas fa-check-circle text-green-500 mr-2"></i>
418
+ <strong>验证:</strong> 如果测试脚本能正常运行并生成文本输出,说明模型已正确加载并可以使用。
419
+ </div>
420
+ </div>
421
+
422
+ <div class="bg-white rounded-lg shadow-md p-6 mb-8">
423
+ <h2 class="text-2xl font-semibold mb-4 text-gray-800 border-b pb-2">
424
+ <i class="fas fa-exchange-alt text-purple-500 mr-2"></i>迁移到内网工作站
425
+ </h2>
426
+
427
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">1. 准备迁移内容</h3>
428
+ <p class="mb-4">将以下内容复制到固态硬盘:</p>
429
+ <ul class="list-disc pl-5 space-y-1 mb-4">
430
+ <li>完整的WSL Ubuntu系统 (导出为tar��件)</li>
431
+ <li>模型文件 (~/models/deepseek-v3-0324/)</li>
432
+ <li>Python虚拟环境 (~/deepseek-env/)</li>
433
+ <li>测试脚本和配置文件</li>
434
+ </ul>
435
+
436
+ <div class="code-block">
437
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
438
+ <code># 在WSL中导出Ubuntu系统
439
+ wsl --export Ubuntu-22.04 ubuntu-22.04-deepseek.tar
440
+
441
+ # 复制模型文件和虚拟环境
442
+ cp -r ~/models /mnt/e/deepseek-deploy/
443
+ cp -r ~/deepseek-env /mnt/e/deepseek-deploy/
444
+ cp test_deepseek.py /mnt/e/deepseek-deploy/</code>
445
+ </div>
446
+
447
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">2. 在内网工作站上设置</h3>
448
+ <p class="mb-4">将固态硬盘插入内网工作站后执行以下步骤:</p>
449
+
450
+ <div class="code-block">
451
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
452
+ <code># 1. 安装WSL (如果尚未安装)
453
+ wsl --install
454
+
455
+ # 2. 导入Ubuntu系统
456
+ wsl --import Ubuntu-22.04-deepseek C:\WSL\Ubuntu-22.04-deepseek E:\ubuntu-22.04-deepseek.tar
457
+
458
+ # 3. 设置默认用户 (替换yourusername)
459
+ ubuntu2204.exe config --default-user yourusername
460
+
461
+ # 4. 启动WSL
462
+ wsl -d Ubuntu-22.04-deepseek</code>
463
+ </div>
464
+
465
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">3. 验证内网环境</h3>
466
+ <p class="mb-4">在内网工作站的WSL中验证环境:</p>
467
+
468
+ <div class="code-block">
469
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
470
+ <code># 激活虚拟环境
471
+ source /mnt/e/deepseek-deploy/deepseek-env/bin/activate
472
+
473
+ # 验证CUDA
474
+ nvidia-smi
475
+ nvcc --version
476
+
477
+ # 运行测试脚本
478
+ python /mnt/e/deepseek-deploy/test_deepseek.py</code>
479
+ </div>
480
+
481
+ <div class="warning mt-4">
482
+ <i class="fas fa-exclamation-triangle text-orange-500 mr-2"></i>
483
+ <strong>注意:</strong> 确保内网工作站已安装相同或兼容版本的NVIDIA驱动和CUDA工具包。
484
+ </div>
485
+ </div>
486
+
487
+ <div class="bg-white rounded-lg shadow-md p-6 mb-8">
488
+ <h2 class="text-2xl font-semibold mb-4 text-gray-800 border-b pb-2">
489
+ <i class="fas fa-rocket text-red-500 mr-2"></i>高级配置与优化
490
+ </h2>
491
+
492
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">1. 使用Ktransformers加速</h3>
493
+ <p class="mb-4">结合Ktransformers可以进一步提高推理速度:</p>
494
+
495
+ <div class="code-block">
496
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
497
+ <code>from ktransformers import AutoModelForCausalLM
498
+ from unsloth import FastLanguageModel
499
+
500
+ # 加载模型
501
+ model_path = "/path/to/deepseek-v3-0324-Q5_K_M.gguf"
502
+ model, tokenizer = FastLanguageModel.from_pretrained(model_path)
503
+
504
+ # 转换为Ktransformers格式
505
+ kmodel = AutoModelForCausalLM.from_pretrained(model, device_map="auto")
506
+
507
+ # 配置生成参数
508
+ generation_config = {
509
+ "max_new_tokens": 1024,
510
+ "do_sample": True,
511
+ "temperature": 0.7,
512
+ "top_p": 0.9,
513
+ }
514
+
515
+ # 推理示例
516
+ inputs = tokenizer("中国的首都是", return_tensors="pt").to("cuda")
517
+ outputs = kmodel.generate(**inputs, **generation_config)
518
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))</code>
519
+ </div>
520
+
521
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">2. 批处理推理优化</h3>
522
+ <p class="mb-4">利用A6000的大显存进行批处理推理:</p>
523
+
524
+ <div class="code-block">
525
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
526
+ <code>def batch_inference(queries, model, tokenizer, batch_size=4):
527
+ # 编码所有查询
528
+ inputs = tokenizer(queries, return_tensors="pt", padding=True, truncation=True).to("cuda")
529
+
530
+ # 分批处理
531
+ outputs = []
532
+ for i in range(0, len(queries), batch_size):
533
+ batch = {k: v[i:i+batch_size] for k, v in inputs.items()}
534
+ batch_outputs = model.generate(**batch, max_new_tokens=256)
535
+ outputs.extend(tokenizer.batch_decode(batch_outputs, skip_special_tokens=True))
536
+
537
+ return outputs
538
+
539
+ # 示例使用
540
+ queries = [
541
+ "解释人工智能的基本概念",
542
+ "写一首关于春天的诗",
543
+ "Python中如何实现快速排序?",
544
+ "中国的四大发明是什么?"
545
+ ]
546
+ results = batch_inference(queries, kmodel, tokenizer)
547
+ for q, r in zip(queries, results):
548
+ print(f"Q: {q}\nA: {r}\n{'='*50}")</code>
549
+ </div>
550
+
551
+ <h3 class="text-xl font-medium mt-6 mb-3 text-gray-700">3. 性能监控脚本</h3>
552
+ <p class="mb-4">监控GPU使用情况和推理速度:</p>
553
+
554
+ <div class="code-block">
555
+ <button class="copy-btn" onclick="copyCode(this)">复制</button>
556
+ <code>import torch
557
+ from pynvml import *
558
+
559
+ def print_gpu_utilization():
560
+ nvmlInit()
561
+ handle = nvmlDeviceGetHandleByIndex(0)
562
+ info = nvmlDeviceGetMemoryInfo(handle)
563
+ print(f"GPU内存使用: {info.used//1024**2}MB / {info.total//1024**2}MB")
564
+ print(f"GPU利用率: {nvmlDeviceGetUtilizationRates(handle).gpu}%")
565
+
566
+ # 在推理前后调用
567
+ print_gpu_utilization()
568
+ inputs = tokenizer("监控GPU使用情况", return_tensors="pt").to("cuda")
569
+ outputs = model.generate(**inputs, max_new_tokens=100)
570
+ print_gpu_utilization()</code>
571
+ </div>
572
+ </div>
573
+
574
+ <div class="bg-white rounded-lg shadow-md p-6 mb-8">
575
+ <h2 class="text-2xl font-semibold mb-4 text-gray-800 border-b pb-2">
576
+ <i class="fas fa-question-circle text-indigo-500 mr-2"></i>常见问题解决
577
+ </h2>
578
+
579
+ <div class="space-y-4">
580
+ <div class="border-l-4 border-blue-500 pl-4 py-2">
581
+ <h3 class="font-medium">1. CUDA out of memory 错误</h3>
582
+ <p class="text-sm text-gray-600">解决方案:减少<code>n_gpu_layers</code>、降低<code>n_ctx</code>或<code>n_batch</code>,或使用更低量化的模型。</p>
583
+ </div>
584
+
585
+ <div class="border-l-4 border-blue-500 pl-4 py-2">
586
+ <h3 class="font-medium">2. 模型加载缓慢</h3>
587
+ <p class="text-sm text-gray-600">解决方案:确保模型文件在SSD上,增加<code>n_threads</code>参数使用更多CPU核心。</p>
588
+ </div>
589
+
590
+ <div class="border-l-4 border-blue-500 pl-4 py-2">
591
+ <h3 class="font-medium">3. 推理速度不理想</h3>
592
+ <p class="text-sm text-gray-600">解决方案:尝试使用Ktransformers,启用<code>use_cache</code>,并确保足够多的层在GPU上运行。</p>
593
+ </div>
594
+
595
+ <div class="border-l-4 border-blue-500 pl-4 py-2">
596
+ <h3 class="font-medium">4. 迁移后模型无法运行</h3>
597
+ <p class="text-sm text-gray-600">解决方案:检查CUDA版本兼容性,确保内网工作站安装了正确的NVIDIA驱动。</p>
598
+ </div>
599
+
600
+ <div class="border-l-4 border-blue-500 pl-4 py-2">
601
+ <h3 class="font-medium">5. 中文输出质量不佳</h3>
602
+ <p class="text-sm text-gray-600">解决方案:调整<code>temperature</code>和<code>top_p</code>参数,或使用更高量化的模型。</p>
603
+ </div>
604
+ </div>
605
+ </div>
606
+
607
+ <div class="bg-green-50 rounded-lg shadow-md p-6 mb-8 border border-green-200">
608
+ <h2 class="text-2xl font-semibold mb-4 text-green-800 border-b pb-2">
609
+ <i class="fas fa-check-circle text-green-500 mr-2"></i>部署完成
610
+ </h2>
611
+ <p class="mb-4">恭喜!您已成功在NVIDIA A6000显卡的Dell T7910内网工作站上部署了DeepSeek V3 0324模型。</p>
612
+
613
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-4">
614
+ <div class="bg-white p-4 rounded-lg border border-green-100">
615
+ <h3 class="font-medium text-lg mb-2 text-green-700"><i class="fas fa-lightbulb mr-2"></i>下一步建议</h3>
616
+ <ul class="list-disc pl-5 space-y-1">
617
+ <li>创建API服务供其他应用调用</li>
618
+ <li>开发自定义前端界面</li>
619
+ <li>针对特定任务进行微调</li>
620
+ </ul>
621
+ </div>
622
+ <div class="bg-white p-4 rounded-lg border border-green-100">
623
+ <h3 class="font-medium text-lg mb-2 text-green-700"><i class="fas fa-book mr-2"></i>学习资源</h3>
624
+ <ul class="list-disc pl-5 space-y-1">
625
+ <li>DeepSeek官方文档</li>
626
+ <li>Unsloth GitHub仓库</li>
627
+ <li>Ktransformers使用指南</li>
628
+ </ul>
629
+ </div>
630
+ </div>
631
+ </div>
632
+
633
+ <footer class="text-center text-sm text-gray-500 mt-8">
634
+ <p>© 2023 DeepSeek V3 0324 部署指南 | 使用Ktransformers+Unsloth联合部署方案</p>
635
+ <p class="mt-2">最后更新: 2023年11月</p>
636
+ </footer>
637
+ </div>
638
+
639
+ <script>
640
+ // 复制代码功能
641
+ function copyCode(button) {
642
+ const codeBlock = button.parentElement;
643
+ const code = codeBlock.querySelector('code').textContent;
644
+ navigator.clipboard.writeText(code).then(() => {
645
+ button.textContent = '已复制!';
646
+ setTimeout(() => {
647
+ button.textContent = '复制';
648
+ }, 2000);
649
+ });
650
+ }
651
+
652
+ // 标签页功能
653
+ function openTab(evt, tabName) {
654
+ const tabContents = document.getElementsByClassName("tab-content");
655
+ for (let i = 0; i < tabContents.length; i++) {
656
+ tabContents[i].classList.remove("active");
657
+ }
658
+
659
+ const tabButtons = document.getElementsByClassName("tab-button");
660
+ for (let i = 0; i < tabButtons.length; i++) {
661
+ tabButtons[i].classList.remove("active");
662
+ }
663
+
664
+ document.getElementById(tabName).classList.add("active");
665
+ evt.currentTarget.classList.add("active");
666
+ }
667
+ </script>
668
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - <a href="https://enzostvs-deepsite.hf.space?remix=lzyhn/deepseek" style="color: #fff;text-decoration: underline;" target="_blank" >🧬 Remix</a></p></body>
669
+ </html>