Fraser commited on
Commit
c7f2c59
Β·
1 Parent(s): 06e1be5

proto-what now?

Browse files
Files changed (2) hide show
  1. prototype_index.html +14 -0
  2. prototype_web_chat.html +278 -0
prototype_index.html CHANGED
@@ -303,6 +303,19 @@
303
  resultDiv.innerHTML = "Generating image…";
304
 
305
  try {
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  const output = await flux.predict("/infer", [
307
  prompt,
308
  seed,
@@ -312,6 +325,7 @@
312
  steps
313
  ]);
314
 
 
315
  const [image, usedSeed] = output.data;
316
  let url;
317
  if (typeof image === "string") url = image;
 
303
  resultDiv.innerHTML = "Generating image…";
304
 
305
  try {
306
+ console.log("=== CALLING FLUX ===");
307
+ console.log("Parameters:", { prompt, seed, randomize, width, height, steps });
308
+
309
+ // Check Space status first
310
+ console.log("Checking FLUX Space status...");
311
+ try {
312
+ const apiInfo = await flux.view_api();
313
+ console.log("FLUX Space API info:", apiInfo);
314
+ } catch (apiErr) {
315
+ console.warn("Could not get FLUX API info:", apiErr);
316
+ }
317
+
318
+ console.log("Calling flux.predict...");
319
  const output = await flux.predict("/infer", [
320
  prompt,
321
  seed,
 
325
  steps
326
  ]);
327
 
328
+ console.log("FLUX Raw result:", output);
329
  const [image, usedSeed] = output.data;
330
  let url;
331
  if (typeof image === "string") url = image;
prototype_web_chat.html ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <title>Browser LLM (WASM, mobile)</title>
7
+ <style>
8
+ :root { --bg:#0b0d10; --card:#14171b; --muted:#9aa4af; --accent:#6ee7b7; --danger:#f87171; --text:#dce3ea; }
9
+ * { box-sizing:border-box; }
10
+ body { margin:0; background:var(--bg); color:var(--text); font:16px/1.45 system-ui, -apple-system, Segoe UI, Roboto, "Helvetica Neue", Arial, "Apple Color Emoji","Segoe UI Emoji"; }
11
+ header { padding:14px 16px; border-bottom:1px solid #21262c; display:flex; gap:10px; align-items:center; }
12
+ header h1 { font-size:16px; margin:0; font-weight:600; }
13
+ header .pill { font-size:12px; color:var(--bg); background:var(--accent); padding:.2rem .55rem; border-radius:999px; font-weight:700; letter-spacing:.02em; }
14
+ main { display:grid; grid-template-rows:auto 1fr auto; height:calc(100dvh - 58px); }
15
+ .bar { display:flex; flex-wrap:wrap; gap:8px; padding:10px 12px; background:#0f1216; border-bottom:1px solid #21262c; align-items:center; }
16
+ select, input[type="number"] { background:var(--card); color:var(--text); border:1px solid #29313a; border-radius:10px; padding:8px 10px; }
17
+ button { background:#1c2128; color:var(--text); border:1px solid #2a323c; border-radius:12px; padding:10px 12px; font-weight:600; cursor:pointer; }
18
+ button.primary { background:var(--accent); color:#08261b; border:none; }
19
+ button.ghost { background:transparent; border-color:#2a323c; }
20
+ button:disabled { opacity:.6; cursor:not-allowed; }
21
+ .grow { flex:1 1 auto; }
22
+ .progress { width:160px; height:8px; background:#1a1f25; border-radius:999px; overflow:hidden; border:1px solid #25303a; }
23
+ .progress > i { display:block; height:100%; width:0%; background:linear-gradient(90deg,#34d399,#10b981); transition:width .25s ease; }
24
+ #stats { font-size:12px; color:var(--muted); display:flex; gap:10px; align-items:center; }
25
+ #chat { padding:14px; overflow:auto; background:linear-gradient(#0b0d10, #0d1117); }
26
+ .msg { max-width:820px; margin:0 auto 10px auto; display:flex; gap:10px; align-items:flex-start; }
27
+ .msg .bubble { background:var(--card); padding:12px 14px; border-radius:16px; border:1px solid #242c35; white-space:pre-wrap; }
28
+ .msg.user .bubble { background:#1d2330; }
29
+ .msg.assistant .bubble { background:#151c24; }
30
+ .role { font-size:12px; color:var(--muted); min-width:68px; text-transform:uppercase; letter-spacing:.04em; }
31
+ .inputbar { display:flex; gap:8px; padding:10px; border-top:1px solid #21262c; background:#0f1216; }
32
+ textarea { resize:none; height:64px; padding:10px 12px; flex:1 1 auto; border-radius:12px; border:1px solid #2a323c; background:var(--card); color:var(--text); }
33
+ .tiny { font-size:12px; color:var(--muted); }
34
+ .warn { color:var(--danger); font-weight:600; }
35
+ .row { display:flex; gap:8px; align-items:center; flex-wrap:wrap; }
36
+ .spacer { flex:1; }
37
+ a { color:#93c5fd; }
38
+ </style>
39
+ </head>
40
+ <body>
41
+ <header>
42
+ <h1>Browser LLM</h1>
43
+ <span class="pill">WASM β€’ CPU-only</span>
44
+ <span id="isoNote" class="tiny"></span>
45
+ </header>
46
+
47
+ <main>
48
+ <div class="bar">
49
+ <label>Model:</label>
50
+ <select id="model">
51
+ <!-- Public Hugging Face GGUFs (no hosting needed) -->
52
+ <option value='{"id":"QuantFactory/SmolLM2-360M-GGUF","file":"SmolLM2-360M.Q4_0.gguf","label":"SmolLM2-360M Q4_0 (β‰ˆ229 MB)"}'>
53
+ SmolLM2-360M Q4_0 (β‰ˆ229 MB)
54
+ </option>
55
+ <option value='{"id":"QuantFactory/SmolLM2-360M-GGUF","file":"SmolLM2-360M.Q3_K_S.gguf","label":"SmolLM2-360M Q3_K_S (β‰ˆ219 MB, faster)"}'>
56
+ SmolLM2-360M Q3_K_S (β‰ˆ219 MB, faster)
57
+ </option>
58
+ <option value='{"id":"QuantFactory/SmolLM2-360M-GGUF","file":"SmolLM2-360M.Q2_K.gguf","label":"SmolLM2-360M Q2_K (β‰ˆ~200 MB, min RAM / quality drop)"}'>
59
+ SmolLM2-360M Q2_K (β‰ˆ~200 MB, min RAM / quality drop)
60
+ </option>
61
+ </select>
62
+
63
+ <div class="row">
64
+ <label>Max new tokens</label>
65
+ <input id="nPredict" type="number" min="1" max="512" step="1" value="128" />
66
+ </div>
67
+ <div class="row">
68
+ <label>Temp</label><input id="temp" type="number" min="0" max="2" step="0.1" value="0.7" style="width:80px" />
69
+ <label>Top-p</label><input id="topp" type="number" min="0" max="1" step="0.05" value="0.9" style="width:80px" />
70
+ <label>Top-k</label><input id="topk" type="number" min="1" max="100" step="1" value="40" style="width:80px" />
71
+ </div>
72
+
73
+ <div class="spacer"></div>
74
+
75
+ <button id="loadBtn" class="primary">Load model</button>
76
+ <button id="unloadBtn" class="ghost" disabled>Unload</button>
77
+
78
+ <div class="progress" title="download progress"><i id="prog"></i></div>
79
+ <div id="stats">idle</div>
80
+ </div>
81
+
82
+ <div id="chat" aria-live="polite"></div>
83
+
84
+ <form class="inputbar" id="form">
85
+ <textarea id="input" placeholder="Ask me anything…" required></textarea>
86
+ <div class="row" style="flex-direction:column; gap:6px; align-items:flex-end">
87
+ <button id="sendBtn" class="primary">Send</button>
88
+ <button id="stopBtn" type="button" class="ghost" disabled>Stop</button>
89
+ <div class="tiny">Context kept small for mobile perf</div>
90
+ </div>
91
+ </form>
92
+ </main>
93
+
94
+ <script type="module">
95
+ // β€”β€”β€” Fixed imports (pin version + explicit wasm paths) β€”β€”β€”
96
+ import { Wllama, LoggerWithoutDebug } from "https://cdn.jsdelivr.net/npm/@wllama/[email protected]/esm/index.js";
97
+
98
+ // Provide the wasm URLs directly so there is no "+esm" indirection.
99
+ const CONFIG_PATHS = {
100
+ "single-thread/wllama.wasm": "https://cdn.jsdelivr.net/npm/@wllama/[email protected]/esm/single-thread/wllama.wasm",
101
+ "multi-thread/wllama.wasm" : "https://cdn.jsdelivr.net/npm/@wllama/[email protected]/esm/multi-thread/wllama.wasm",
102
+ };
103
+
104
+ // β€”β€”β€” DOM refs β€”β€”β€”
105
+ const $model = document.getElementById('model');
106
+ const $load = document.getElementById('loadBtn');
107
+ const $unload= document.getElementById('unloadBtn');
108
+ const $prog = document.getElementById('prog');
109
+ const $stats = document.getElementById('stats');
110
+ const $chat = document.getElementById('chat');
111
+ const $form = document.getElementById('form');
112
+ const $input = document.getElementById('input');
113
+ const $send = document.getElementById('sendBtn');
114
+ const $stop = document.getElementById('stopBtn');
115
+ const $iso = document.getElementById('isoNote');
116
+
117
+ // β€”β€”β€” State β€”β€”β€”
118
+ const decoder = new TextDecoder();
119
+ const wllama = new Wllama(CONFIG_PATHS, { logger: LoggerWithoutDebug });
120
+ let aborter = null;
121
+ let loaded = false;
122
+ let eotToken = -1;
123
+ let sysPrompt = "You are a helpful, concise assistant. Keep answers short and clear.";
124
+
125
+ // Keep RAM low for mobile: small context + int4 KV cache
126
+ const LOAD_CONFIG = {
127
+ n_ctx: 768,
128
+ n_batch: 48,
129
+ cache_type_k: "q4_0",
130
+ cache_type_v: "f16", // <- WASM-safe (do NOT set q4_0 here)
131
+ flash_attn: false, // WASM: flash attention unavailable
132
+ progressCallback: ({ loaded, total }) => {
133
+ const pct = (total && total > 0) ? Math.round(loaded / total * 100) : 0;
134
+ $prog.style.width = pct + '%';
135
+ }
136
+ };
137
+
138
+ const messages = [ { role: "system", content: sysPrompt } ];
139
+
140
+ // β€”β€”β€” UI helpers β€”β€”β€”
141
+ const ui = {
142
+ add(role, text) {
143
+ const row = document.createElement('div');
144
+ row.className = 'msg ' + role;
145
+ row.innerHTML = `
146
+ <div class="role">${role}</div>
147
+ <div class="bubble"></div>
148
+ `;
149
+ row.querySelector('.bubble').textContent = text;
150
+ $chat.appendChild(row);
151
+ $chat.scrollTop = $chat.scrollHeight;
152
+ return row.querySelector('.bubble');
153
+ },
154
+ setStats(txt) { $stats.textContent = txt; }
155
+ };
156
+
157
+ function noteIsolation() {
158
+ if (!crossOriginIsolated) {
159
+ $iso.innerHTML = 'Single-thread mode (serve with COOP/COEP for multithread)';
160
+ } else {
161
+ $iso.textContent = 'Cross-origin isolated: multithread on';
162
+ }
163
+ }
164
+ noteIsolation();
165
+
166
+ function truncateHistoryForMobile(maxTokensRough = 900) {
167
+ const maxChars = maxTokensRough * 4; // rough heuristic
168
+ function clip(s) { return s.length <= maxChars ? s : ('…' + s.slice(s.length - maxChars)); }
169
+ let kept = [messages[0]]; // keep system
170
+ const lastTurns = messages.slice(-8);
171
+ for (const m of lastTurns) kept.push({ role: m.role, content: clip(m.content) });
172
+ messages.length = 0; messages.push(...kept);
173
+ }
174
+
175
+ async function ensureLoaded() {
176
+ if (loaded) return;
177
+ $prog.style.width = '0%';
178
+ const choice = JSON.parse($model.value);
179
+ ui.setStats('Fetching model…');
180
+ await wllama.loadModelFromHF(choice.id, choice.file, LOAD_CONFIG);
181
+ loaded = true;
182
+ eotToken = wllama.getEOT();
183
+ const meta = await wllama.getModelMetadata();
184
+ const ctx = wllama.getLoadedContextInfo();
185
+ const thr = wllama.getNumThreads?.() ?? 1;
186
+ ui.setStats(`Loaded ${choice.file} β€’ ${meta.n_params?.toLocaleString?.() || 'β‰ˆ360M'} params β€’ ctx ${ctx.n_ctx} β€’ threads ${thr}`);
187
+ $load.disabled = true; $unload.disabled = false;
188
+ }
189
+
190
+ async function unloadModel() {
191
+ try { await wllama.exit(); } catch {}
192
+ loaded = false;
193
+ $load.disabled = false; $unload.disabled = true;
194
+ $prog.style.width = '0%';
195
+ ui.setStats('idle');
196
+ }
197
+
198
+ // β€”β€”β€” Chat flow β€”β€”β€”
199
+ document.getElementById('loadBtn').addEventListener('click', ensureLoaded);
200
+ document.getElementById('unloadBtn').addEventListener('click', unloadModel);
201
+ document.getElementById('stopBtn').addEventListener('click', () => aborter?.abort());
202
+
203
+ $form.addEventListener('submit', async (ev) => {
204
+ ev.preventDefault();
205
+ const text = ($input.value || '').trim();
206
+ if (!text) return;
207
+ await ensureLoaded();
208
+
209
+ messages.push({ role: 'user', content: text });
210
+ const userBubble = ui.add('user', text);
211
+ $input.value = '';
212
+
213
+ const assistantBubble = ui.add('assistant', '');
214
+ truncateHistoryForMobile(900);
215
+
216
+ $send.disabled = true; $stop.disabled = true; // will flip to true once stream starts
217
+ aborter = new AbortController();
218
+
219
+ const nPredict = parseInt(document.getElementById('nPredict').value, 10);
220
+ const temp = parseFloat(document.getElementById('temp').value);
221
+ const top_p = parseFloat(document.getElementById('topp').value);
222
+ const top_k = parseInt(document.getElementById('topk').value, 10);
223
+
224
+ const t0 = performance.now();
225
+ let outText = '';
226
+
227
+ try {
228
+ $stop.disabled = false;
229
+ const stream = await wllama.createChatCompletion(messages, {
230
+ stream: true,
231
+ useCache: true,
232
+ nPredict,
233
+ sampling: { temp, top_p, top_k },
234
+ stopTokens: eotToken > 0 ? [eotToken] : undefined,
235
+ abortSignal: aborter.signal
236
+ });
237
+
238
+ for await (const chunk of stream) {
239
+ const piece = new TextDecoder().decode(chunk.piece);
240
+ outText += piece;
241
+ assistantBubble.textContent = outText;
242
+ $chat.scrollTop = $chat.scrollHeight;
243
+ }
244
+ const dt = (performance.now() - t0) / 1000;
245
+ const tokSec = Math.max(1, Math.round(outText.length / 4)) / dt;
246
+ ui.setStats(`gen: ${tokSec.toFixed(1)} tok/s (rough)`);
247
+ messages.push({ role: 'assistant', content: outText });
248
+ } catch (err) {
249
+ if (err && err.name === 'AbortError') {
250
+ assistantBubble.textContent += '\n\n[stopped]';
251
+ } else {
252
+ console.error(err);
253
+ assistantBubble.innerHTML += `\n\n<span class="warn">Error: ${String(err)}</span>`;
254
+ }
255
+ } finally {
256
+ $send.disabled = false; $stop.disabled = true;
257
+ aborter = null;
258
+ }
259
+ });
260
+
261
+ // Enter-to-send on mobile; Shift+Enter for newline
262
+ $input.addEventListener('keydown', (e) => {
263
+ if (e.key === 'Enter' && !e.shiftKey) {
264
+ e.preventDefault();
265
+ $send.click();
266
+ }
267
+ });
268
+ </script>
269
+
270
+ <!--
271
+ Notes:
272
+ β€’ Fixed the 404 by importing explicit ESM entry and pointing to the exact wasm files on jsDelivr.
273
+ β€’ Runs entirely on CPU via WebAssembly (no WebGPU). Works in single-thread everywhere; for multithread,
274
+ serve with COOP/COEP headers so SharedArrayBuffer is available.
275
+ β€’ For even lower RAM or faster sampling, pick Q3_K_S or Q2_K in the dropdown, and keep n_ctx modest.
276
+ -->
277
+ </body>
278
+ </html>