ITHwangg commited on
Commit
cdf0c25
·
1 Parent(s): 515f4c9

add models

Browse files
Files changed (8) hide show
  1. README.md +1 -1
  2. build/m.d.ts +48 -0
  3. build/m.js +467 -0
  4. build/m_bg.wasm +3 -0
  5. build/m_bg.wasm.d.ts +16 -0
  6. index.html +357 -17
  7. qwenWorker.js +162 -0
  8. style.css +0 -28
README.md CHANGED
@@ -9,4 +9,4 @@ license: apache-2.0
9
  short_description: Qwen2.5-Instruct models on the browser with candle and wasm
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  short_description: Qwen2.5-Instruct models on the browser with candle and wasm
10
  ---
11
 
12
+ - Check out the source code [here](https://github.com/ITHwang/llm-serving-wasm)
build/m.d.ts ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ export class Model {
4
+ free(): void;
5
+ constructor(weights: Uint8Array, tokenizer: Uint8Array, config: Uint8Array, quantized: boolean);
6
+ init_with_prompt(prompt: string, temp: number, top_p: number, repeat_penalty: number, repeat_last_n: number, seed: bigint): string;
7
+ next_token(): string;
8
+ }
9
+
10
+ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
11
+
12
+ export interface InitOutput {
13
+ readonly memory: WebAssembly.Memory;
14
+ readonly __wbg_model_free: (a: number, b: number) => void;
15
+ readonly model_load: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => [number, number, number];
16
+ readonly model_init_with_prompt: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: bigint) => [number, number, number, number];
17
+ readonly model_next_token: (a: number) => [number, number, number, number];
18
+ readonly main: (a: number, b: number) => number;
19
+ readonly __wbindgen_exn_store: (a: number) => void;
20
+ readonly __externref_table_alloc: () => number;
21
+ readonly __wbindgen_export_2: WebAssembly.Table;
22
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
23
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
24
+ readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
25
+ readonly __externref_table_dealloc: (a: number) => void;
26
+ readonly __wbindgen_start: () => void;
27
+ }
28
+
29
+ export type SyncInitInput = BufferSource | WebAssembly.Module;
30
+ /**
31
+ * Instantiates the given `module`, which can either be bytes or
32
+ * a precompiled `WebAssembly.Module`.
33
+ *
34
+ * @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
35
+ *
36
+ * @returns {InitOutput}
37
+ */
38
+ export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
39
+
40
+ /**
41
+ * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
42
+ * for everything else, calls `WebAssembly.instantiate` directly.
43
+ *
44
+ * @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
45
+ *
46
+ * @returns {Promise<InitOutput>}
47
+ */
48
+ export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
build/m.js ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let wasm;
2
+
3
+ function addToExternrefTable0(obj) {
4
+ const idx = wasm.__externref_table_alloc();
5
+ wasm.__wbindgen_export_2.set(idx, obj);
6
+ return idx;
7
+ }
8
+
9
+ function handleError(f, args) {
10
+ try {
11
+ return f.apply(this, args);
12
+ } catch (e) {
13
+ const idx = addToExternrefTable0(e);
14
+ wasm.__wbindgen_exn_store(idx);
15
+ }
16
+ }
17
+
18
+ const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
19
+
20
+ if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
21
+
22
+ let cachedUint8ArrayMemory0 = null;
23
+
24
+ function getUint8ArrayMemory0() {
25
+ if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
26
+ cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
27
+ }
28
+ return cachedUint8ArrayMemory0;
29
+ }
30
+
31
+ function getStringFromWasm0(ptr, len) {
32
+ ptr = ptr >>> 0;
33
+ return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
34
+ }
35
+
36
+ let WASM_VECTOR_LEN = 0;
37
+
38
+ const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
39
+
40
+ const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
41
+ ? function (arg, view) {
42
+ return cachedTextEncoder.encodeInto(arg, view);
43
+ }
44
+ : function (arg, view) {
45
+ const buf = cachedTextEncoder.encode(arg);
46
+ view.set(buf);
47
+ return {
48
+ read: arg.length,
49
+ written: buf.length
50
+ };
51
+ });
52
+
53
+ function passStringToWasm0(arg, malloc, realloc) {
54
+
55
+ if (realloc === undefined) {
56
+ const buf = cachedTextEncoder.encode(arg);
57
+ const ptr = malloc(buf.length, 1) >>> 0;
58
+ getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
59
+ WASM_VECTOR_LEN = buf.length;
60
+ return ptr;
61
+ }
62
+
63
+ let len = arg.length;
64
+ let ptr = malloc(len, 1) >>> 0;
65
+
66
+ const mem = getUint8ArrayMemory0();
67
+
68
+ let offset = 0;
69
+
70
+ for (; offset < len; offset++) {
71
+ const code = arg.charCodeAt(offset);
72
+ if (code > 0x7F) break;
73
+ mem[ptr + offset] = code;
74
+ }
75
+
76
+ if (offset !== len) {
77
+ if (offset !== 0) {
78
+ arg = arg.slice(offset);
79
+ }
80
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
81
+ const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
82
+ const ret = encodeString(arg, view);
83
+
84
+ offset += ret.written;
85
+ ptr = realloc(ptr, len, offset, 1) >>> 0;
86
+ }
87
+
88
+ WASM_VECTOR_LEN = offset;
89
+ return ptr;
90
+ }
91
+
92
+ let cachedDataViewMemory0 = null;
93
+
94
+ function getDataViewMemory0() {
95
+ if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
96
+ cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
97
+ }
98
+ return cachedDataViewMemory0;
99
+ }
100
+
101
+ function isLikeNone(x) {
102
+ return x === undefined || x === null;
103
+ }
104
+
105
+ function passArray8ToWasm0(arg, malloc) {
106
+ const ptr = malloc(arg.length * 1, 1) >>> 0;
107
+ getUint8ArrayMemory0().set(arg, ptr / 1);
108
+ WASM_VECTOR_LEN = arg.length;
109
+ return ptr;
110
+ }
111
+
112
+ function takeFromExternrefTable0(idx) {
113
+ const value = wasm.__wbindgen_export_2.get(idx);
114
+ wasm.__externref_table_dealloc(idx);
115
+ return value;
116
+ }
117
+
118
+ const ModelFinalization = (typeof FinalizationRegistry === 'undefined')
119
+ ? { register: () => {}, unregister: () => {} }
120
+ : new FinalizationRegistry(ptr => wasm.__wbg_model_free(ptr >>> 0, 1));
121
+
122
+ export class Model {
123
+
124
+ __destroy_into_raw() {
125
+ const ptr = this.__wbg_ptr;
126
+ this.__wbg_ptr = 0;
127
+ ModelFinalization.unregister(this);
128
+ return ptr;
129
+ }
130
+
131
+ free() {
132
+ const ptr = this.__destroy_into_raw();
133
+ wasm.__wbg_model_free(ptr, 0);
134
+ }
135
+ /**
136
+ * @param {Uint8Array} weights
137
+ * @param {Uint8Array} tokenizer
138
+ * @param {Uint8Array} config
139
+ * @param {boolean} quantized
140
+ */
141
+ constructor(weights, tokenizer, config, quantized) {
142
+ const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
143
+ const len0 = WASM_VECTOR_LEN;
144
+ const ptr1 = passArray8ToWasm0(tokenizer, wasm.__wbindgen_malloc);
145
+ const len1 = WASM_VECTOR_LEN;
146
+ const ptr2 = passArray8ToWasm0(config, wasm.__wbindgen_malloc);
147
+ const len2 = WASM_VECTOR_LEN;
148
+ const ret = wasm.model_load(ptr0, len0, ptr1, len1, ptr2, len2, quantized);
149
+ if (ret[2]) {
150
+ throw takeFromExternrefTable0(ret[1]);
151
+ }
152
+ this.__wbg_ptr = ret[0] >>> 0;
153
+ ModelFinalization.register(this, this.__wbg_ptr, this);
154
+ return this;
155
+ }
156
+ /**
157
+ * @param {string} prompt
158
+ * @param {number} temp
159
+ * @param {number} top_p
160
+ * @param {number} repeat_penalty
161
+ * @param {number} repeat_last_n
162
+ * @param {bigint} seed
163
+ * @returns {string}
164
+ */
165
+ init_with_prompt(prompt, temp, top_p, repeat_penalty, repeat_last_n, seed) {
166
+ let deferred3_0;
167
+ let deferred3_1;
168
+ try {
169
+ const ptr0 = passStringToWasm0(prompt, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
170
+ const len0 = WASM_VECTOR_LEN;
171
+ const ret = wasm.model_init_with_prompt(this.__wbg_ptr, ptr0, len0, temp, top_p, repeat_penalty, repeat_last_n, seed);
172
+ var ptr2 = ret[0];
173
+ var len2 = ret[1];
174
+ if (ret[3]) {
175
+ ptr2 = 0; len2 = 0;
176
+ throw takeFromExternrefTable0(ret[2]);
177
+ }
178
+ deferred3_0 = ptr2;
179
+ deferred3_1 = len2;
180
+ return getStringFromWasm0(ptr2, len2);
181
+ } finally {
182
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
183
+ }
184
+ }
185
+ /**
186
+ * @returns {string}
187
+ */
188
+ next_token() {
189
+ let deferred2_0;
190
+ let deferred2_1;
191
+ try {
192
+ const ret = wasm.model_next_token(this.__wbg_ptr);
193
+ var ptr1 = ret[0];
194
+ var len1 = ret[1];
195
+ if (ret[3]) {
196
+ ptr1 = 0; len1 = 0;
197
+ throw takeFromExternrefTable0(ret[2]);
198
+ }
199
+ deferred2_0 = ptr1;
200
+ deferred2_1 = len1;
201
+ return getStringFromWasm0(ptr1, len1);
202
+ } finally {
203
+ wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
204
+ }
205
+ }
206
+ }
207
+
208
+ async function __wbg_load(module, imports) {
209
+ if (typeof Response === 'function' && module instanceof Response) {
210
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
211
+ try {
212
+ return await WebAssembly.instantiateStreaming(module, imports);
213
+
214
+ } catch (e) {
215
+ if (module.headers.get('Content-Type') != 'application/wasm') {
216
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
217
+
218
+ } else {
219
+ throw e;
220
+ }
221
+ }
222
+ }
223
+
224
+ const bytes = await module.arrayBuffer();
225
+ return await WebAssembly.instantiate(bytes, imports);
226
+
227
+ } else {
228
+ const instance = await WebAssembly.instantiate(module, imports);
229
+
230
+ if (instance instanceof WebAssembly.Instance) {
231
+ return { instance, module };
232
+
233
+ } else {
234
+ return instance;
235
+ }
236
+ }
237
+ }
238
+
239
+ function __wbg_get_imports() {
240
+ const imports = {};
241
+ imports.wbg = {};
242
+ imports.wbg.__wbg_buffer_609cc3eee51ed158 = function(arg0) {
243
+ const ret = arg0.buffer;
244
+ return ret;
245
+ };
246
+ imports.wbg.__wbg_call_672a4d21634d4a24 = function() { return handleError(function (arg0, arg1) {
247
+ const ret = arg0.call(arg1);
248
+ return ret;
249
+ }, arguments) };
250
+ imports.wbg.__wbg_call_7cccdd69e0791ae2 = function() { return handleError(function (arg0, arg1, arg2) {
251
+ const ret = arg0.call(arg1, arg2);
252
+ return ret;
253
+ }, arguments) };
254
+ imports.wbg.__wbg_crypto_ed58b8e10a292839 = function(arg0) {
255
+ const ret = arg0.crypto;
256
+ return ret;
257
+ };
258
+ imports.wbg.__wbg_error_7534b8e9a36f1ab4 = function(arg0, arg1) {
259
+ let deferred0_0;
260
+ let deferred0_1;
261
+ try {
262
+ deferred0_0 = arg0;
263
+ deferred0_1 = arg1;
264
+ console.error(getStringFromWasm0(arg0, arg1));
265
+ } finally {
266
+ wasm.__wbindgen_free(deferred0_0, deferred0_1, 1);
267
+ }
268
+ };
269
+ imports.wbg.__wbg_getRandomValues_bcb4912f16000dc4 = function() { return handleError(function (arg0, arg1) {
270
+ arg0.getRandomValues(arg1);
271
+ }, arguments) };
272
+ imports.wbg.__wbg_log_55f8292edd5cb819 = function(arg0, arg1) {
273
+ console.log(getStringFromWasm0(arg0, arg1));
274
+ };
275
+ imports.wbg.__wbg_msCrypto_0a36e2ec3a343d26 = function(arg0) {
276
+ const ret = arg0.msCrypto;
277
+ return ret;
278
+ };
279
+ imports.wbg.__wbg_new_8a6f238a6ece86ea = function() {
280
+ const ret = new Error();
281
+ return ret;
282
+ };
283
+ imports.wbg.__wbg_new_a12002a7f91c75be = function(arg0) {
284
+ const ret = new Uint8Array(arg0);
285
+ return ret;
286
+ };
287
+ imports.wbg.__wbg_newnoargs_105ed471475aaf50 = function(arg0, arg1) {
288
+ const ret = new Function(getStringFromWasm0(arg0, arg1));
289
+ return ret;
290
+ };
291
+ imports.wbg.__wbg_newwithbyteoffsetandlength_d97e637ebe145a9a = function(arg0, arg1, arg2) {
292
+ const ret = new Uint8Array(arg0, arg1 >>> 0, arg2 >>> 0);
293
+ return ret;
294
+ };
295
+ imports.wbg.__wbg_newwithlength_a381634e90c276d4 = function(arg0) {
296
+ const ret = new Uint8Array(arg0 >>> 0);
297
+ return ret;
298
+ };
299
+ imports.wbg.__wbg_node_02999533c4ea02e3 = function(arg0) {
300
+ const ret = arg0.node;
301
+ return ret;
302
+ };
303
+ imports.wbg.__wbg_now_807e54c39636c349 = function() {
304
+ const ret = Date.now();
305
+ return ret;
306
+ };
307
+ imports.wbg.__wbg_process_5c1d670bc53614b8 = function(arg0) {
308
+ const ret = arg0.process;
309
+ return ret;
310
+ };
311
+ imports.wbg.__wbg_randomFillSync_ab2cfe79ebbf2740 = function() { return handleError(function (arg0, arg1) {
312
+ arg0.randomFillSync(arg1);
313
+ }, arguments) };
314
+ imports.wbg.__wbg_require_79b1e9274cde3c87 = function() { return handleError(function () {
315
+ const ret = module.require;
316
+ return ret;
317
+ }, arguments) };
318
+ imports.wbg.__wbg_set_65595bdd868b3009 = function(arg0, arg1, arg2) {
319
+ arg0.set(arg1, arg2 >>> 0);
320
+ };
321
+ imports.wbg.__wbg_stack_0ed75d68575b0f3c = function(arg0, arg1) {
322
+ const ret = arg1.stack;
323
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
324
+ const len1 = WASM_VECTOR_LEN;
325
+ getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
326
+ getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
327
+ };
328
+ imports.wbg.__wbg_static_accessor_GLOBAL_88a902d13a557d07 = function() {
329
+ const ret = typeof global === 'undefined' ? null : global;
330
+ return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
331
+ };
332
+ imports.wbg.__wbg_static_accessor_GLOBAL_THIS_56578be7e9f832b0 = function() {
333
+ const ret = typeof globalThis === 'undefined' ? null : globalThis;
334
+ return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
335
+ };
336
+ imports.wbg.__wbg_static_accessor_SELF_37c5d418e4bf5819 = function() {
337
+ const ret = typeof self === 'undefined' ? null : self;
338
+ return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
339
+ };
340
+ imports.wbg.__wbg_static_accessor_WINDOW_5de37043a91a9c40 = function() {
341
+ const ret = typeof window === 'undefined' ? null : window;
342
+ return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
343
+ };
344
+ imports.wbg.__wbg_subarray_aa9065fa9dc5df96 = function(arg0, arg1, arg2) {
345
+ const ret = arg0.subarray(arg1 >>> 0, arg2 >>> 0);
346
+ return ret;
347
+ };
348
+ imports.wbg.__wbg_versions_c71aa1626a93e0a1 = function(arg0) {
349
+ const ret = arg0.versions;
350
+ return ret;
351
+ };
352
+ imports.wbg.__wbindgen_error_new = function(arg0, arg1) {
353
+ const ret = new Error(getStringFromWasm0(arg0, arg1));
354
+ return ret;
355
+ };
356
+ imports.wbg.__wbindgen_init_externref_table = function() {
357
+ const table = wasm.__wbindgen_export_2;
358
+ const offset = table.grow(4);
359
+ table.set(0, undefined);
360
+ table.set(offset + 0, undefined);
361
+ table.set(offset + 1, null);
362
+ table.set(offset + 2, true);
363
+ table.set(offset + 3, false);
364
+ ;
365
+ };
366
+ imports.wbg.__wbindgen_is_function = function(arg0) {
367
+ const ret = typeof(arg0) === 'function';
368
+ return ret;
369
+ };
370
+ imports.wbg.__wbindgen_is_object = function(arg0) {
371
+ const val = arg0;
372
+ const ret = typeof(val) === 'object' && val !== null;
373
+ return ret;
374
+ };
375
+ imports.wbg.__wbindgen_is_string = function(arg0) {
376
+ const ret = typeof(arg0) === 'string';
377
+ return ret;
378
+ };
379
+ imports.wbg.__wbindgen_is_undefined = function(arg0) {
380
+ const ret = arg0 === undefined;
381
+ return ret;
382
+ };
383
+ imports.wbg.__wbindgen_memory = function() {
384
+ const ret = wasm.memory;
385
+ return ret;
386
+ };
387
+ imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
388
+ const ret = getStringFromWasm0(arg0, arg1);
389
+ return ret;
390
+ };
391
+ imports.wbg.__wbindgen_throw = function(arg0, arg1) {
392
+ throw new Error(getStringFromWasm0(arg0, arg1));
393
+ };
394
+
395
+ return imports;
396
+ }
397
+
398
+ function __wbg_init_memory(imports, memory) {
399
+
400
+ }
401
+
402
+ function __wbg_finalize_init(instance, module) {
403
+ wasm = instance.exports;
404
+ __wbg_init.__wbindgen_wasm_module = module;
405
+ cachedDataViewMemory0 = null;
406
+ cachedUint8ArrayMemory0 = null;
407
+
408
+
409
+ wasm.__wbindgen_start();
410
+ return wasm;
411
+ }
412
+
413
+ function initSync(module) {
414
+ if (wasm !== undefined) return wasm;
415
+
416
+
417
+ if (typeof module !== 'undefined') {
418
+ if (Object.getPrototypeOf(module) === Object.prototype) {
419
+ ({module} = module)
420
+ } else {
421
+ console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
422
+ }
423
+ }
424
+
425
+ const imports = __wbg_get_imports();
426
+
427
+ __wbg_init_memory(imports);
428
+
429
+ if (!(module instanceof WebAssembly.Module)) {
430
+ module = new WebAssembly.Module(module);
431
+ }
432
+
433
+ const instance = new WebAssembly.Instance(module, imports);
434
+
435
+ return __wbg_finalize_init(instance, module);
436
+ }
437
+
438
+ async function __wbg_init(module_or_path) {
439
+ if (wasm !== undefined) return wasm;
440
+
441
+
442
+ if (typeof module_or_path !== 'undefined') {
443
+ if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
444
+ ({module_or_path} = module_or_path)
445
+ } else {
446
+ console.warn('using deprecated parameters for the initialization function; pass a single object instead')
447
+ }
448
+ }
449
+
450
+ if (typeof module_or_path === 'undefined') {
451
+ module_or_path = new URL('m_bg.wasm', import.meta.url);
452
+ }
453
+ const imports = __wbg_get_imports();
454
+
455
+ if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
456
+ module_or_path = fetch(module_or_path);
457
+ }
458
+
459
+ __wbg_init_memory(imports);
460
+
461
+ const { instance, module } = await __wbg_load(await module_or_path, imports);
462
+
463
+ return __wbg_finalize_init(instance, module);
464
+ }
465
+
466
+ export { initSync };
467
+ export default __wbg_init;
build/m_bg.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f824eb6d35931342c341423cd0af0271fabc129b971828feb9d83aaeba132770
3
+ size 4985301
build/m_bg.wasm.d.ts ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ export const memory: WebAssembly.Memory;
4
+ export const __wbg_model_free: (a: number, b: number) => void;
5
+ export const model_load: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => [number, number, number];
6
+ export const model_init_with_prompt: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: bigint) => [number, number, number, number];
7
+ export const model_next_token: (a: number) => [number, number, number, number];
8
+ export const main: (a: number, b: number) => number;
9
+ export const __wbindgen_exn_store: (a: number) => void;
10
+ export const __externref_table_alloc: () => number;
11
+ export const __wbindgen_export_2: WebAssembly.Table;
12
+ export const __wbindgen_free: (a: number, b: number, c: number) => void;
13
+ export const __wbindgen_malloc: (a: number, b: number) => number;
14
+ export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
15
+ export const __externref_table_dealloc: (a: number) => void;
16
+ export const __wbindgen_start: () => void;
index.html CHANGED
@@ -1,19 +1,359 @@
1
- <!doctype html>
2
  <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
  </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  <html>
2
+
3
+ <head>
4
+ <meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
5
+ <title>Candle Qwen2.5 Instruct Rust/WASM</title>
6
+ </head>
7
+
8
+ <body></body>
9
+
 
 
 
 
 
 
 
 
10
  </html>
11
+
12
+ <!DOCTYPE html>
13
+ <html>
14
+
15
+ <head>
16
+ <meta charset="UTF-8" />
17
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
18
+ <link rel="stylesheet"
19
+ href="https://cdn.jsdelivr.net/gh/highlightjs/[email protected]/build/styles/default.min.css" />
20
+ <style>
21
+ @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
22
+
23
+ html,
24
+ body {
25
+ font-family: "Source Sans 3", sans-serif;
26
+ }
27
+
28
+ code,
29
+ output,
30
+ select,
31
+ pre {
32
+ font-family: "Source Code Pro", monospace;
33
+ }
34
+ </style>
35
+ <style type="text/tailwindcss">
36
+ .link {
37
+ @apply underline hover:text-blue-500 hover:no-underline;
38
+ }
39
+ </style>
40
+ <script src="https://cdn.tailwindcss.com"></script>
41
+ <script type="module">
42
+ import snarkdown from "https://cdn.skypack.dev/snarkdown";
43
+ import hljs from "https://cdn.skypack.dev/highlight.js";
44
+ // models base url
45
+ const MODELS = {
46
+ qwen_25_05_instruct_q4k: {
47
+ model_name: "qwen2.5-0.5b-instruct-q4_k_m.gguf",
48
+ model_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf",
49
+ tokenizer_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/resolve/main/tokenizer.json",
50
+ config_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/resolve/main/config.json",
51
+ quantized: true,
52
+ seq_len: 2048,
53
+ size: "491 MB",
54
+ },
55
+ qwen_25_05_instruct_q80: {
56
+ model_name: "qwen2.5-0.5b-instruct-q8_0.gguf",
57
+ model_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q8_0.gguf",
58
+ tokenizer_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/resolve/main/tokenizer.json",
59
+ config_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/resolve/main/config.json",
60
+ quantized: true,
61
+ seq_len: 2048,
62
+ size: "676 MB",
63
+ },
64
+ qwen_25_05_instruct_f16: {
65
+ model_name: "qwen2.5-0.5b-instruct-f16.gguf",
66
+ model_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-f16.gguf",
67
+ tokenizer_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/resolve/main/tokenizer.json",
68
+ config_url: "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/resolve/main/config.json",
69
+ quantized: true,
70
+ seq_len: 2048,
71
+ size: "1.27 GB",
72
+ },
73
+ qwen_25_15_instruct_q4k: {
74
+ model_name: "qwen2.5-1.5b-instruct-q4_k_m.gguf",
75
+ model_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf",
76
+ tokenizer_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/resolve/main/tokenizer.json",
77
+ config_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/resolve/main/config.json",
78
+ quantized: true,
79
+ seq_len: 2048,
80
+ size: "1.12 GB",
81
+ },
82
+ qwen_25_15_instruct_q80: {
83
+ model_name: "qwen2.5-1.5b-instruct-q8_0.gguf",
84
+ model_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf",
85
+ tokenizer_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/resolve/main/tokenizer.json",
86
+ config_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/resolve/main/config.json",
87
+ quantized: true,
88
+ seq_len: 2048,
89
+ size: "1.89 GB",
90
+ },
91
+ qwen_25_15_instruct_f16: {
92
+ model_name: "qwen2.5-1.5b-instruct-f16.gguf",
93
+ model_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-f16.gguf",
94
+ tokenizer_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/resolve/main/tokenizer.json",
95
+ config_url: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/resolve/main/config.json",
96
+ quantized: true,
97
+ seq_len: 2048,
98
+ size: "3.56 GB",
99
+ },
100
+ };
101
+
102
+ const qwenWorker = new Worker("./qwenWorker.js", {
103
+ type: "module",
104
+ });
105
+ async function generateSequence(controller) {
106
+ const getValue = (id) => document.querySelector(`#${id}`).value;
107
+
108
+ const modelID = getValue("model");
109
+ const model = MODELS[modelID];
110
+ const weightsURL = model.model_url
111
+ const tokenizerURL = model.tokenizer_url;
112
+ const configURL = model.config_url;
113
+
114
+ const prompt = getValue("prompt").trim();
115
+ const temperature = getValue("temperature");
116
+ const topP = getValue("top-p");
117
+ const repeatPenalty = getValue("repeat_penalty");
118
+ const seed = getValue("seed");
119
+ const maxSeqLen = getValue("max-seq");
120
+
121
+ function updateStatus(data) {
122
+ const outStatus = document.querySelector("#output-status");
123
+ const outGen = document.querySelector("#output-generation");
124
+ const outCounter = document.querySelector("#output-counter");
125
+
126
+ switch (data.status) {
127
+ case "loading":
128
+ outStatus.hidden = false;
129
+ outStatus.textContent = data.message;
130
+ outGen.hidden = true;
131
+ outCounter.hidden = true;
132
+ break;
133
+ case "generating":
134
+ const { message, prompt, sentence, tokensSec, totalTime } = data;
135
+ outStatus.hidden = true;
136
+ outCounter.hidden = false;
137
+ outGen.hidden = false;
138
+ outGen.innerHTML = snarkdown(sentence);
139
+ outCounter.innerHTML = `${(totalTime / 1000).toFixed(
140
+ 2
141
+ )}s (${tokensSec.toFixed(2)} tok/s)`;
142
+ hljs.highlightAll();
143
+ break;
144
+ case "complete":
145
+ outStatus.hidden = true;
146
+ outGen.hidden = false;
147
+ break;
148
+ }
149
+ }
150
+
151
+ return new Promise((resolve, reject) => {
152
+ qwenWorker.postMessage({
153
+ weightsURL,
154
+ modelID,
155
+ tokenizerURL,
156
+ configURL,
157
+ quantized: model.quantized,
158
+ prompt,
159
+ temp: temperature,
160
+ top_p: topP,
161
+ repeatPenalty,
162
+ seed: seed,
163
+ maxSeqLen,
164
+ command: "start",
165
+ });
166
+
167
+ const handleAbort = () => {
168
+ qwenWorker.postMessage({ command: "abort" });
169
+ };
170
+ const handleMessage = (event) => {
171
+ const { status, error, message, prompt, sentence } = event.data;
172
+ if (status) updateStatus(event.data);
173
+ if (error) {
174
+ qwenWorker.removeEventListener("message", handleMessage);
175
+ reject(new Error(`ERROR: ${error}. Please check available RAM in your local machine.`));
176
+ }
177
+ if (status === "aborted") {
178
+ qwenWorker.removeEventListener("message", handleMessage);
179
+ resolve(event.data);
180
+ }
181
+ if (status === "complete") {
182
+ qwenWorker.removeEventListener("message", handleMessage);
183
+ resolve(event.data);
184
+ }
185
+ };
186
+
187
+ controller.signal.addEventListener("abort", handleAbort);
188
+ qwenWorker.addEventListener("message", handleMessage);
189
+ });
190
+ }
191
+
192
+ const form = document.querySelector("#form");
193
+ const prompt = document.querySelector("#prompt");
194
+ const clearBtn = document.querySelector("#clear-btn");
195
+ const runBtn = document.querySelector("#run");
196
+ const modelSelect = document.querySelector("#model");
197
+ let runController = new AbortController();
198
+ let isRunning = false;
199
+
200
+ document.addEventListener("DOMContentLoaded", () => {
201
+ for (const [id, model] of Object.entries(MODELS)) {
202
+ const option = document.createElement("option");
203
+ option.value = id;
204
+ option.innerText = `${model.model_name} (${model.size})`;
205
+ modelSelect.appendChild(option);
206
+ }
207
+ const query = new URLSearchParams(window.location.search);
208
+ const modelID = query.get("model");
209
+ if (modelID) {
210
+ modelSelect.value = modelID;
211
+ } else {
212
+ modelSelect.value = "qwen_25_05_instruct_q4k";
213
+ }
214
+ });
215
+
216
+ modelSelect.addEventListener("change", (e) => {
217
+ const query = new URLSearchParams(window.location.search);
218
+ query.set("model", e.target.value);
219
+ window.history.replaceState(
220
+ {},
221
+ "",
222
+ `${window.location.pathname}?${query}`
223
+ );
224
+ window.parent.postMessage({ queryString: "?" + query }, "*");
225
+ const model = MODELS[e.target.value];
226
+ document.querySelector("#max-seq").max = model.seq_len;
227
+ document.querySelector("#max-seq").nextElementSibling.value = 512;
228
+ });
229
+
230
+ form.addEventListener("submit", async (e) => {
231
+ e.preventDefault();
232
+ if (isRunning) {
233
+ stopRunning();
234
+ } else {
235
+ startRunning();
236
+ await generateSequence(runController);
237
+ stopRunning();
238
+ }
239
+ });
240
+
241
+ function startRunning() {
242
+ isRunning = true;
243
+ runBtn.textContent = "Stop";
244
+ }
245
+
246
+ function stopRunning() {
247
+ runController.abort();
248
+ runController = new AbortController();
249
+ runBtn.textContent = "Run";
250
+ isRunning = false;
251
+ }
252
+ clearBtn.addEventListener("click", (e) => {
253
+ e.preventDefault();
254
+ prompt.value = "";
255
+ clearBtn.classList.add("invisible");
256
+ runBtn.disabled = true;
257
+ stopRunning();
258
+ });
259
+ prompt.addEventListener("input", (e) => {
260
+ runBtn.disabled = false;
261
+ if (e.target.value.length > 0) {
262
+ clearBtn.classList.remove("invisible");
263
+ } else {
264
+ clearBtn.classList.add("invisible");
265
+ }
266
+ });
267
+ </script>
268
+ </head>
269
+
270
+ <body class="container max-w-4xl mx-auto p-4 text-gray-800">
271
+ <main class="grid grid-cols-1 gap-8 relative">
272
+ <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
273
+ <div>
274
+ <h1 class="text-5xl font-bold">Candle Qwen2.5 Instruct</h1>
275
+ <h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
276
+ <p class="max-w-lg">
277
+ The
278
+ <a href="https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e" class="link"
279
+ target="_blank">Qwen2.5</a>
280
+ models are state-of-the-art multi-language models ranging from 0.5B to 72B parameters.
281
+ From the benefits of WebAssembly, you can try some of them without LLM servers.
282
+ For more details of these models, please refer to the
283
+ <a href="https://arxiv.org/pdf/2412.15115" class="link" target="_blank">technical report</a>.
284
+ </p>
285
+ </div>
286
+ <div>
287
+ <p class="text-xs italic max-w-lg">
288
+ <b>Note:</b>
289
+ When first run, the app will download and cache the model, which could take a few minutes.
290
+ </p>
291
+ </div>
292
+ <div>
293
+ <label for="model" class="font-medium">Models Options: </label>
294
+ <select id="model" class="border-2 border-gray-500 rounded-md font-light"></select>
295
+ </div>
296
+ <form id="form" class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
297
+ <input type="submit" hidden />
298
+ <textarea type="text" id="prompt" class="font-light text-lg w-full px-3 py-2 mx-1 resize-none outline-none"
299
+ oninput="this.style.height = 0;this.style.height = this.scrollHeight + 'px'"
300
+ placeholder="Add your prompt here..."></textarea>
301
+ <button id="clear-btn">
302
+ <svg fill="none" xmlns="http://www.w3.org/2000/svg" width="40" viewBox="0 0 70 40">
303
+ <path opacity=".5" d="M39 .2v40.2" stroke="#1F2937" />
304
+ <path d="M1.5 11.5 19 29.1m0-17.6L1.5 29.1" opacity=".5" stroke="#1F2937" stroke-width="2" />
305
+ </svg>
306
+ </button>
307
+ <button id="run"
308
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed">
309
+ Run
310
+ </button>
311
+ </form>
312
+ <details>
313
+ <summary class="font-medium cursor-pointer">Advanced Options</summary>
314
+
315
+ <div class="grid grid-cols-3 max-w-md items-center gap-3 py-3">
316
+ <label class="text-sm font-medium" for="max-seq">Maximum length
317
+ </label>
318
+ <input type="range" id="max-seq" name="max-seq" min="1" max="2048" step="1" value="512"
319
+ oninput="this.nextElementSibling.value = Number(this.value)" />
320
+ <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
321
+ 512</output>
322
+ <label class="text-sm font-medium" for="temperature">Temperature</label>
323
+ <input type="range" id="temperature" name="temperature" min="0" max="2" step="0.01" value="0.00"
324
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
325
+ <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
326
+ 0.00</output>
327
+ <label class="text-sm font-medium" for="top-p">Top-p</label>
328
+ <input type="range" id="top-p" name="top-p" min="0" max="1" step="0.01" value="0.90"
329
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
330
+ <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
331
+ 0.90</output>
332
+ <label class="text-sm font-medium" for="repeat_penalty">Repeat Penalty</label>
333
+ <input type="range" id="repeat_penalty" name="repeat_penalty" min="1" max="2" step="0.01" value="1.10"
334
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
335
+ <output
336
+ class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">1.10</output>
337
+ <label class="text-sm font-medium" for="seed">Seed</label>
338
+ <input type="number" id="seed" name="seed" value="42"
339
+ class="font-light border border-gray-700 text-right rounded-md p-2" />
340
+ <button id="run"
341
+ onclick="document.querySelector('#seed').value = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER)"
342
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-1 w-[50px] rounded disabled:bg-gray-300 disabled:cursor-not-allowed text-sm">
343
+ Rand
344
+ </button>
345
+ </div>
346
+ </details>
347
+
348
+ <div>
349
+ <h3 class="font-medium">Generation:</h3>
350
+ <div class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2">
351
+ <div id="output-counter" hidden class="ml-auto font-semibold grid-rows-1"></div>
352
+ <p hidden id="output-generation" class="grid-rows-2 text-lg"></p>
353
+ <span id="output-status" class="m-auto font-light">No output yet</span>
354
+ </div>
355
+ </div>
356
+ </main>
357
+ </body>
358
+
359
+ </html>
qwenWorker.js ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import init, { Model } from "./build/m.js";
2
+
3
+ async function fetchArrayBuffer(url) {
4
+ const cacheName = "qwen-instruct-candle-cache";
5
+ const cache = await caches.open(cacheName);
6
+ const cachedResponse = await cache.match(url);
7
+ if (cachedResponse) {
8
+ console.log("load from cache");
9
+ const data = await cachedResponse.arrayBuffer();
10
+ return new Uint8Array(data);
11
+ }
12
+
13
+ console.log("load from huggingface");
14
+ const res = await fetch(url, { cache: "force-cache" });
15
+ cache.put(url, res.clone());
16
+ return new Uint8Array(await res.arrayBuffer());
17
+ }
18
+ async function concatenateArrayBuffers(urls) {
19
+ const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)));
20
+
21
+ let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0);
22
+ let concatenatedBuffer = new Uint8Array(totalLength);
23
+
24
+ let offset = 0;
25
+ arrayBuffers.forEach(buffer => {
26
+ concatenatedBuffer.set(new Uint8Array(buffer), offset);
27
+ offset += buffer.byteLength;
28
+ });
29
+ return concatenatedBuffer;
30
+ }
31
+
32
+ class Qwen {
33
+ static instance = {};
34
+
35
+ static async getInstance(
36
+ weightsURL,
37
+ modelID,
38
+ tokenizerURL,
39
+ configURL,
40
+ quantized
41
+ ) {
42
+ // load individual modelID only once
43
+ if (!this.instance[modelID]) {
44
+ await init();
45
+
46
+ self.postMessage({ status: "loading", message: "Loading Model" });
47
+ const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
48
+ await Promise.all([
49
+ weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
50
+ fetchArrayBuffer(tokenizerURL),
51
+ fetchArrayBuffer(configURL),
52
+ ]);
53
+
54
+ this.instance[modelID] = new Model(
55
+ weightsArrayU8,
56
+ tokenizerArrayU8,
57
+ configArrayU8,
58
+ quantized
59
+ );
60
+ }
61
+ return this.instance[modelID];
62
+ }
63
+ }
64
+
65
+ let controller = null;
66
+ self.addEventListener("message", (event) => {
67
+ if (event.data.command === "start") {
68
+ controller = new AbortController();
69
+ generate(event.data);
70
+ } else if (event.data.command === "abort") {
71
+ controller.abort();
72
+ }
73
+ });
74
+
75
+ async function generate(data) {
76
+ const {
77
+ weightsURL,
78
+ modelID,
79
+ tokenizerURL,
80
+ configURL,
81
+ quantized,
82
+ prompt,
83
+ temp,
84
+ top_p,
85
+ repeatPenalty,
86
+ seed,
87
+ maxSeqLen,
88
+ } = data;
89
+ try {
90
+ self.postMessage({ status: "loading", message: "Starting Qwen" });
91
+ const model = await Qwen.getInstance(
92
+ weightsURL,
93
+ modelID,
94
+ tokenizerURL,
95
+ configURL,
96
+ quantized
97
+ );
98
+
99
+ self.postMessage({ status: "loading", message: "Initializing model" });
100
+ const promptString = `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
101
+ const firstToken = model.init_with_prompt(
102
+ promptString,
103
+ temp,
104
+ top_p,
105
+ repeatPenalty,
106
+ 64,
107
+ BigInt(seed)
108
+ );
109
+
110
+ let sentence = firstToken;
111
+ let maxTokens = maxSeqLen;
112
+ let startTime = performance.now();
113
+ let tokensCount = 0;
114
+ while (tokensCount < maxTokens) {
115
+ await new Promise(async (resolve) => {
116
+ if (controller && controller.signal.aborted) {
117
+ self.postMessage({
118
+ status: "aborted",
119
+ message: "Aborted",
120
+ output: sentence,
121
+ });
122
+ return;
123
+ }
124
+
125
+ const token = await model.next_token();
126
+
127
+ if (token === "<|im_end|>") {
128
+ self.postMessage({
129
+ status: "complete",
130
+ message: "complete",
131
+ output: sentence,
132
+ });
133
+ return;
134
+ }
135
+
136
+ const tokensSec =
137
+ ((tokensCount + 1) / (performance.now() - startTime)) * 1000;
138
+
139
+ sentence += token;
140
+
141
+ self.postMessage({
142
+ status: "generating",
143
+ message: "Generating token",
144
+ token: token,
145
+ sentence: sentence,
146
+ totalTime: performance.now() - startTime,
147
+ tokensSec,
148
+ prompt: prompt,
149
+ });
150
+ setTimeout(resolve, 0);
151
+ });
152
+ tokensCount++;
153
+ }
154
+ self.postMessage({
155
+ status: "complete",
156
+ message: "complete",
157
+ output: prompt + sentence,
158
+ });
159
+ } catch (e) {
160
+ self.postMessage({ error: e });
161
+ }
162
+ }
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }