fhsp93 commited on
Commit
e91a8d9
·
verified ·
1 Parent(s): e1318d7

Add 2 files

Browse files
Files changed (2) hide show
  1. README.md +7 -5
  2. index.html +591 -19
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Nocodeapprtest
3
- emoji: 🏆
4
- colorFrom: gray
5
- colorTo: indigo
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: nocodeapprtest
3
+ emoji: 🐳
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,591 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>LLM Fine-Tuning Data Processor</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <style>
10
+ .dropzone {
11
+ border: 2px dashed #cbd5e0;
12
+ transition: all 0.3s ease;
13
+ }
14
+ .dropzone.active {
15
+ border-color: #4299e1;
16
+ background-color: #ebf8ff;
17
+ }
18
+ .progress-bar {
19
+ transition: width 0.3s ease;
20
+ }
21
+ .text-input {
22
+ min-height: 150px;
23
+ }
24
+ .fade-in {
25
+ animation: fadeIn 0.3s ease-in;
26
+ }
27
+ @keyframes fadeIn {
28
+ from { opacity: 0; transform: translateY(10px); }
29
+ to { opacity: 1; transform: translateY(0); }
30
+ }
31
+ .custom-scrollbar::-webkit-scrollbar {
32
+ width: 8px;
33
+ }
34
+ .custom-scrollbar::-webkit-scrollbar-track {
35
+ background: #f1f1f1;
36
+ }
37
+ .custom-scrollbar::-webkit-scrollbar-thumb {
38
+ background: #cbd5e0;
39
+ border-radius: 4px;
40
+ }
41
+ .custom-scrollbar::-webkit-scrollbar-thumb:hover {
42
+ background: #a0aec0;
43
+ }
44
+ </style>
45
+ </head>
46
+ <body class="bg-gray-50 min-h-screen">
47
+ <div class="container mx-auto px-4 py-8 max-w-6xl">
48
+ <header class="mb-8 text-center">
49
+ <h1 class="text-3xl font-bold text-gray-800 mb-2">LLM Fine-Tuning Data Processor</h1>
50
+ <p class="text-gray-600">Transform raw text into optimized datasets for language model training</p>
51
+ </header>
52
+
53
+ <div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
54
+ <!-- Input Section -->
55
+ <div class="lg:col-span-2 bg-white rounded-lg shadow-md overflow-hidden">
56
+ <div class="p-6 border-b border-gray-200">
57
+ <h2 class="text-xl font-semibold text-gray-700">Data Input</h2>
58
+ </div>
59
+
60
+ <div class="p-6">
61
+ <div class="mb-6">
62
+ <div class="flex justify-between items-center mb-2">
63
+ <label class="block text-sm font-medium text-gray-700">Input Method</label>
64
+ </div>
65
+ <div class="flex space-x-4">
66
+ <button id="textTab" class="tab-btn active px-4 py-2 rounded-md bg-blue-100 text-blue-700 font-medium">Text Input</button>
67
+ <button id="fileTab" class="tab-btn px-4 py-2 rounded-md bg-gray-100 text-gray-700 font-medium">File Upload</button>
68
+ <button id="apiTab" class="tab-btn px-4 py-2 rounded-md bg-gray-100 text-gray-700 font-medium">API Fetch</button>
69
+ </div>
70
+ </div>
71
+
72
+ <!-- Text Input Panel -->
73
+ <div id="textPanel" class="input-panel active">
74
+ <label class="block text-sm font-medium text-gray-700 mb-2">Enter your text</label>
75
+ <textarea id="textInput" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-input" placeholder="Paste your text here or type directly..."></textarea>
76
+ <div class="flex justify-between items-center mt-2">
77
+ <span id="charCount" class="text-xs text-gray-500">0 characters</span>
78
+ <button id="clearTextBtn" class="text-xs text-red-500 hover:text-red-700">Clear</button>
79
+ </div>
80
+ </div>
81
+
82
+ <!-- File Upload Panel -->
83
+ <div id="filePanel" class="input-panel hidden">
84
+ <div id="dropzone" class="dropzone rounded-lg p-8 text-center cursor-pointer mb-4">
85
+ <i class="fas fa-cloud-upload-alt text-4xl text-gray-400 mb-3"></i>
86
+ <p class="text-gray-600 mb-1">Drag & drop files here or click to browse</p>
87
+ <p class="text-xs text-gray-500">Supports: .txt, .csv, .json, .pdf (max 10MB)</p>
88
+ <input type="file" id="fileInput" class="hidden" multiple accept=".txt,.csv,.json,.pdf">
89
+ </div>
90
+ <div id="fileList" class="hidden">
91
+ <h3 class="text-sm font-medium text-gray-700 mb-2">Selected Files:</h3>
92
+ <ul id="fileListItems" class="space-y-2"></ul>
93
+ </div>
94
+ </div>
95
+
96
+ <!-- API Panel -->
97
+ <div id="apiPanel" class="input-panel hidden">
98
+ <div class="mb-4">
99
+ <label class="block text-sm font-medium text-gray-700 mb-2">API Endpoint</label>
100
+ <input type="text" id="apiUrl" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" placeholder="https://api.example.com/data">
101
+ </div>
102
+ <div class="mb-4">
103
+ <label class="block text-sm font-medium text-gray-700 mb-2">Headers (JSON)</label>
104
+ <textarea id="apiHeaders" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 h-24" placeholder='{"Authorization": "Bearer token"}'></textarea>
105
+ </div>
106
+ <button id="fetchApiBtn" class="w-full bg-blue-600 hover:bg-blue-700 text-white py-2 px-4 rounded-lg transition duration-200">
107
+ Fetch Data
108
+ </button>
109
+ </div>
110
+ </div>
111
+ </div>
112
+
113
+ <!-- Processing Options -->
114
+ <div class="bg-white rounded-lg shadow-md overflow-hidden">
115
+ <div class="p-6 border-b border-gray-200">
116
+ <h2 class="text-xl font-semibold text-gray-700">Processing Options</h2>
117
+ </div>
118
+
119
+ <div class="p-6">
120
+ <div class="mb-6">
121
+ <label class="block text-sm font-medium text-gray-700 mb-2">Output Format</label>
122
+ <select id="outputFormat" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
123
+ <option value="jsonl">JSON Lines (.jsonl)</option>
124
+ <option value="csv">CSV</option>
125
+ <option value="txt">Plain Text</option>
126
+ </select>
127
+ </div>
128
+
129
+ <div class="mb-6">
130
+ <label class="block text-sm font-medium text-gray-700 mb-2">Text Processing</label>
131
+ <div class="space-y-2">
132
+ <div class="flex items-center">
133
+ <input type="checkbox" id="removeEmptyLines" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded" checked>
134
+ <label for="removeEmptyLines" class="ml-2 block text-sm text-gray-700">Remove empty lines</label>
135
+ </div>
136
+ <div class="flex items-center">
137
+ <input type="checkbox" id="normalizeWhitespace" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded" checked>
138
+ <label for="normalizeWhitespace" class="ml-2 block text-sm text-gray-700">Normalize whitespace</label>
139
+ </div>
140
+ <div class="flex items-center">
141
+ <input type="checkbox" id="removeSpecialChars" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
142
+ <label for="removeSpecialChars" class="ml-2 block text-sm text-gray-700">Remove special characters</label>
143
+ </div>
144
+ <div class="flex items-center">
145
+ <input type="checkbox" id="lowercaseText" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
146
+ <label for="lowercaseText" class="ml-2 block text-sm text-gray-700">Convert to lowercase</label>
147
+ </div>
148
+ </div>
149
+ </div>
150
+
151
+ <div class="mb-6">
152
+ <label class="block text-sm font-medium text-gray-700 mb-2">Chunking</label>
153
+ <div class="flex items-center space-x-2">
154
+ <input type="number" id="chunkSize" class="w-20 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" value="512" min="64" max="4096">
155
+ <span class="text-sm text-gray-700">tokens per chunk</span>
156
+ </div>
157
+ <div class="mt-2 flex items-center">
158
+ <input type="checkbox" id="overlapChunks" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
159
+ <label for="overlapChunks" class="ml-2 block text-sm text-gray-700">Overlap chunks (25%)</label>
160
+ </div>
161
+ </div>
162
+
163
+ <div class="mb-6">
164
+ <label class="block text-sm font-medium text-gray-700 mb-2">Metadata</label>
165
+ <div id="metadataFields" class="space-y-2">
166
+ <div class="flex items-center space-x-2">
167
+ <input type="text" placeholder="Key" class="flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
168
+ <input type="text" placeholder="Value" class="flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
169
+ <button class="remove-metadata-btn text-red-500 hover:text-red-700">
170
+ <i class="fas fa-times"></i>
171
+ </button>
172
+ </div>
173
+ </div>
174
+ <button id="addMetadataBtn" class="mt-2 text-sm text-blue-600 hover:text-blue-800 flex items-center">
175
+ <i class="fas fa-plus mr-1"></i> Add Metadata Field
176
+ </button>
177
+ </div>
178
+
179
+ <button id="processBtn" class="w-full bg-blue-600 hover:bg-blue-700 text-white py-3 px-4 rounded-lg font-medium transition duration-200 flex items-center justify-center">
180
+ <i class="fas fa-cog mr-2"></i> Process Data
181
+ </button>
182
+ </div>
183
+ </div>
184
+ </div>
185
+
186
+ <!-- Results Section -->
187
+ <div id="resultsSection" class="mt-8 bg-white rounded-lg shadow-md overflow-hidden hidden">
188
+ <div class="p-6 border-b border-gray-200 flex justify-between items-center">
189
+ <h2 class="text-xl font-semibold text-gray-700">Processing Results</h2>
190
+ <div class="flex space-x-2">
191
+ <button id="downloadBtn" class="bg-green-600 hover:bg-green-700 text-white py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
192
+ <i class="fas fa-download mr-2"></i> Download
193
+ </button>
194
+ <button id="copyBtn" class="bg-gray-200 hover:bg-gray-300 text-gray-800 py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
195
+ <i class="fas fa-copy mr-2"></i> Copy
196
+ </button>
197
+ <button id="clearResultsBtn" class="bg-gray-200 hover:bg-gray-300 text-gray-800 py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
198
+ <i class="fas fa-trash-alt mr-2"></i> Clear
199
+ </button>
200
+ </div>
201
+ </div>
202
+
203
+ <div class="p-6">
204
+ <div id="statsContainer" class="mb-6 grid grid-cols-1 md:grid-cols-3 gap-4">
205
+ <div class="bg-blue-50 p-4 rounded-lg">
206
+ <p class="text-sm text-blue-700 font-medium">Total Chunks</p>
207
+ <p id="totalChunks" class="text-2xl font-bold text-blue-900">0</p>
208
+ </div>
209
+ <div class="bg-green-50 p-4 rounded-lg">
210
+ <p class="text-sm text-green-700 font-medium">Total Tokens</p>
211
+ <p id="totalTokens" class="text-2xl font-bold text-green-900">0</p>
212
+ </div>
213
+ <div class="bg-purple-50 p-4 rounded-lg">
214
+ <p class="text-sm text-purple-700 font-medium">Processing Time</p>
215
+ <p id="processingTime" class="text-2xl font-bold text-purple-900">0ms</p>
216
+ </div>
217
+ </div>
218
+
219
+ <div class="mb-4">
220
+ <label class="block text-sm font-medium text-gray-700 mb-2">Preview (first 3 items)</label>
221
+ <div id="resultsPreview" class="bg-gray-50 p-4 rounded-lg overflow-y-auto max-h-96 custom-scrollbar text-sm font-mono text-gray-800"></div>
222
+ </div>
223
+
224
+ <div id="progressContainer" class="hidden">
225
+ <div class="flex justify-between mb-1">
226
+ <span class="text-sm font-medium text-gray-700">Processing</span>
227
+ <span id="progressPercent" class="text-sm font-medium text-gray-700">0%</span>
228
+ </div>
229
+ <div class="w-full bg-gray-200 rounded-full h-2.5">
230
+ <div id="progressBar" class="progress-bar bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
231
+ </div>
232
+ </div>
233
+ </div>
234
+ </div>
235
+ </div>
236
+
237
+ <script>
238
+ document.addEventListener('DOMContentLoaded', function() {
239
+ // UI Elements
240
+ const textTab = document.getElementById('textTab');
241
+ const fileTab = document.getElementById('fileTab');
242
+ const apiTab = document.getElementById('apiTab');
243
+ const textPanel = document.getElementById('textPanel');
244
+ const filePanel = document.getElementById('filePanel');
245
+ const apiPanel = document.getElementById('apiPanel');
246
+ const textInput = document.getElementById('textInput');
247
+ const charCount = document.getElementById('charCount');
248
+ const clearTextBtn = document.getElementById('clearTextBtn');
249
+ const dropzone = document.getElementById('dropzone');
250
+ const fileInput = document.getElementById('fileInput');
251
+ const fileList = document.getElementById('fileList');
252
+ const fileListItems = document.getElementById('fileListItems');
253
+ const processBtn = document.getElementById('processBtn');
254
+ const resultsSection = document.getElementById('resultsSection');
255
+ const resultsPreview = document.getElementById('resultsPreview');
256
+ const totalChunks = document.getElementById('totalChunks');
257
+ const totalTokens = document.getElementById('totalTokens');
258
+ const processingTime = document.getElementById('processingTime');
259
+ const downloadBtn = document.getElementById('downloadBtn');
260
+ const copyBtn = document.getElementById('copyBtn');
261
+ const clearResultsBtn = document.getElementById('clearResultsBtn');
262
+ const progressContainer = document.getElementById('progressContainer');
263
+ const progressBar = document.getElementById('progressBar');
264
+ const progressPercent = document.getElementById('progressPercent');
265
+ const addMetadataBtn = document.getElementById('addMetadataBtn');
266
+ const metadataFields = document.getElementById('metadataFields');
267
+
268
+ // Tab switching
269
+ function switchTab(activeTab, activePanel) {
270
+ // Reset all tabs and panels
271
+ document.querySelectorAll('.tab-btn').forEach(btn => {
272
+ btn.classList.remove('active');
273
+ btn.classList.add('bg-gray-100', 'text-gray-700');
274
+ btn.classList.remove('bg-blue-100', 'text-blue-700');
275
+ });
276
+
277
+ document.querySelectorAll('.input-panel').forEach(panel => {
278
+ panel.classList.add('hidden');
279
+ panel.classList.remove('active');
280
+ });
281
+
282
+ // Activate selected tab and panel
283
+ activeTab.classList.add('active');
284
+ activeTab.classList.remove('bg-gray-100', 'text-gray-700');
285
+ activeTab.classList.add('bg-blue-100', 'text-blue-700');
286
+
287
+ activePanel.classList.remove('hidden');
288
+ activePanel.classList.add('active');
289
+ }
290
+
291
+ textTab.addEventListener('click', () => switchTab(textTab, textPanel));
292
+ fileTab.addEventListener('click', () => switchTab(fileTab, filePanel));
293
+ apiTab.addEventListener('click', () => switchTab(apiTab, apiPanel));
294
+
295
+ // Text input handling
296
+ textInput.addEventListener('input', function() {
297
+ const count = this.value.length;
298
+ charCount.textContent = `${count} characters`;
299
+ });
300
+
301
+ clearTextBtn.addEventListener('click', function() {
302
+ textInput.value = '';
303
+ charCount.textContent = '0 characters';
304
+ });
305
+
306
+ // File upload handling
307
+ dropzone.addEventListener('click', function() {
308
+ fileInput.click();
309
+ });
310
+
311
+ ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
312
+ dropzone.addEventListener(eventName, preventDefaults, false);
313
+ });
314
+
315
+ function preventDefaults(e) {
316
+ e.preventDefault();
317
+ e.stopPropagation();
318
+ }
319
+
320
+ ['dragenter', 'dragover'].forEach(eventName => {
321
+ dropzone.addEventListener(eventName, highlight, false);
322
+ });
323
+
324
+ ['dragleave', 'drop'].forEach(eventName => {
325
+ dropzone.addEventListener(eventName, unhighlight, false);
326
+ });
327
+
328
+ function highlight() {
329
+ dropzone.classList.add('active');
330
+ }
331
+
332
+ function unhighlight() {
333
+ dropzone.classList.remove('active');
334
+ }
335
+
336
+ dropzone.addEventListener('drop', handleDrop, false);
337
+
338
+ function handleDrop(e) {
339
+ const dt = e.dataTransfer;
340
+ const files = dt.files;
341
+ handleFiles(files);
342
+ }
343
+
344
+ fileInput.addEventListener('change', function() {
345
+ handleFiles(this.files);
346
+ });
347
+
348
+ function handleFiles(files) {
349
+ if (files.length === 0) return;
350
+
351
+ fileList.classList.remove('hidden');
352
+ fileListItems.innerHTML = '';
353
+
354
+ for (let i = 0; i < files.length; i++) {
355
+ const file = files[i];
356
+ const listItem = document.createElement('li');
357
+ listItem.className = 'flex items-center justify-between bg-gray-50 p-2 rounded';
358
+
359
+ const fileInfo = document.createElement('div');
360
+ fileInfo.className = 'flex items-center';
361
+
362
+ const icon = document.createElement('i');
363
+ icon.className = 'fas fa-file-alt text-gray-500 mr-2';
364
+
365
+ const fileName = document.createElement('span');
366
+ fileName.className = 'text-sm text-gray-700';
367
+ fileName.textContent = file.name;
368
+
369
+ const fileSize = document.createElement('span');
370
+ fileSize.className = 'text-xs text-gray-500 ml-2';
371
+ fileSize.textContent = formatFileSize(file.size);
372
+
373
+ const removeBtn = document.createElement('button');
374
+ removeBtn.className = 'text-red-500 hover:text-red-700 ml-2';
375
+ removeBtn.innerHTML = '<i class="fas fa-times"></i>';
376
+ removeBtn.addEventListener('click', function() {
377
+ listItem.remove();
378
+ if (fileListItems.children.length === 0) {
379
+ fileList.classList.add('hidden');
380
+ }
381
+ });
382
+
383
+ fileInfo.appendChild(icon);
384
+ fileInfo.appendChild(fileName);
385
+ fileInfo.appendChild(fileSize);
386
+
387
+ listItem.appendChild(fileInfo);
388
+ listItem.appendChild(removeBtn);
389
+
390
+ fileListItems.appendChild(listItem);
391
+ }
392
+ }
393
+
394
+ function formatFileSize(bytes) {
395
+ if (bytes === 0) return '0 Bytes';
396
+ const k = 1024;
397
+ const sizes = ['Bytes', 'KB', 'MB', 'GB'];
398
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
399
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
400
+ }
401
+
402
+ // Metadata fields handling
403
+ addMetadataBtn.addEventListener('click', function() {
404
+ const fieldDiv = document.createElement('div');
405
+ fieldDiv.className = 'flex items-center space-x-2 fade-in';
406
+
407
+ const keyInput = document.createElement('input');
408
+ keyInput.type = 'text';
409
+ keyInput.placeholder = 'Key';
410
+ keyInput.className = 'flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500';
411
+
412
+ const valueInput = document.createElement('input');
413
+ valueInput.type = 'text';
414
+ valueInput.placeholder = 'Value';
415
+ valueInput.className = 'flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500';
416
+
417
+ const removeBtn = document.createElement('button');
418
+ removeBtn.className = 'remove-metadata-btn text-red-500 hover:text-red-700';
419
+ removeBtn.innerHTML = '<i class="fas fa-times"></i>';
420
+ removeBtn.addEventListener('click', function() {
421
+ fieldDiv.remove();
422
+ });
423
+
424
+ fieldDiv.appendChild(keyInput);
425
+ fieldDiv.appendChild(valueInput);
426
+ fieldDiv.appendChild(removeBtn);
427
+
428
+ metadataFields.appendChild(fieldDiv);
429
+ });
430
+
431
+ // Processing function
432
+ processBtn.addEventListener('click', async function() {
433
+ // Show loading state
434
+ processBtn.disabled = true;
435
+ processBtn.innerHTML = '<i class="fas fa-spinner fa-spin mr-2"></i> Processing...';
436
+ progressContainer.classList.remove('hidden');
437
+
438
+ // Simulate processing (in a real app, this would be actual processing)
439
+ let progress = 0;
440
+ const interval = setInterval(() => {
441
+ progress += 5;
442
+ progressBar.style.width = `${progress}%`;
443
+ progressPercent.textContent = `${progress}%`;
444
+
445
+ if (progress >= 100) {
446
+ clearInterval(interval);
447
+ setTimeout(() => {
448
+ finishProcessing();
449
+ }, 500);
450
+ }
451
+ }, 100);
452
+ });
453
+
454
+ function finishProcessing() {
455
+ // Hide progress bar
456
+ progressContainer.classList.add('hidden');
457
+
458
+ // Generate sample results (in a real app, this would be actual processed data)
459
+ const outputFormat = document.getElementById('outputFormat').value;
460
+ const sampleData = generateSampleData(outputFormat);
461
+
462
+ // Update results UI
463
+ resultsPreview.textContent = sampleData.preview;
464
+ totalChunks.textContent = sampleData.stats.chunks;
465
+ totalTokens.textContent = sampleData.stats.tokens;
466
+ processingTime.textContent = sampleData.stats.time + 'ms';
467
+
468
+ // Show results section
469
+ resultsSection.classList.remove('hidden');
470
+ resultsSection.scrollIntoView({ behavior: 'smooth' });
471
+
472
+ // Reset process button
473
+ processBtn.disabled = false;
474
+ processBtn.innerHTML = '<i class="fas fa-cog mr-2"></i> Process Data';
475
+
476
+ // Set up download and copy buttons
477
+ setupDownloadAndCopy(sampleData.fullData, outputFormat);
478
+ }
479
+
480
+ function generateSampleData(format) {
481
+ const chunks = Math.floor(Math.random() * 50) + 10;
482
+ const tokens = chunks * (Math.floor(Math.random() * 200) + 300);
483
+ const time = Math.floor(Math.random() * 2000) + 500;
484
+
485
+ let preview = '';
486
+ let fullData = '';
487
+
488
+ if (format === 'jsonl') {
489
+ for (let i = 0; i < 3; i++) {
490
+ preview += JSON.stringify({
491
+ text: `This is sample text chunk ${i+1} for fine-tuning your LLM. It contains natural language that would be useful for training.`,
492
+ metadata: {
493
+ source: "sample",
494
+ length: Math.floor(Math.random() * 100) + 50
495
+ }
496
+ }, null, 2) + '\n\n';
497
+ }
498
+
499
+ // Generate full data
500
+ for (let i = 0; i < chunks; i++) {
501
+ fullData += JSON.stringify({
502
+ text: `This is sample text chunk ${i+1} for fine-tuning your LLM. It contains natural language that would be useful for training.`,
503
+ metadata: {
504
+ source: "sample",
505
+ length: Math.floor(Math.random() * 100) + 50
506
+ }
507
+ }) + '\n';
508
+ }
509
+ } else if (format === 'csv') {
510
+ preview = 'text,source,length\n';
511
+ for (let i = 0; i < 3; i++) {
512
+ preview += `"This is sample text chunk ${i+1} for fine-tuning your LLM.","sample",${Math.floor(Math.random() * 100) + 50}\n`;
513
+ }
514
+
515
+ // Generate full data
516
+ fullData = 'text,source,length\n';
517
+ for (let i = 0; i < chunks; i++) {
518
+ fullData += `"This is sample text chunk ${i+1} for fine-tuning your LLM.","sample",${Math.floor(Math.random() * 100) + 50}\n`;
519
+ }
520
+ } else {
521
+ for (let i = 0; i < 3; i++) {
522
+ preview += `This is sample text chunk ${i+1} for fine-tuning your LLM.\n\n`;
523
+ }
524
+
525
+ // Generate full data
526
+ for (let i = 0; i < chunks; i++) {
527
+ fullData += `This is sample text chunk ${i+1} for fine-tuning your LLM.\n\n`;
528
+ }
529
+ }
530
+
531
+ return {
532
+ preview: preview.trim(),
533
+ fullData: fullData.trim(),
534
+ stats: {
535
+ chunks: chunks,
536
+ tokens: tokens,
537
+ time: time
538
+ }
539
+ };
540
+ }
541
+
542
+ function setupDownloadAndCopy(data, format) {
543
+ let mimeType, extension;
544
+
545
+ switch(format) {
546
+ case 'jsonl':
547
+ mimeType = 'application/json';
548
+ extension = 'jsonl';
549
+ break;
550
+ case 'csv':
551
+ mimeType = 'text/csv';
552
+ extension = 'csv';
553
+ break;
554
+ default:
555
+ mimeType = 'text/plain';
556
+ extension = 'txt';
557
+ }
558
+
559
+ // Download button
560
+ downloadBtn.onclick = function() {
561
+ const blob = new Blob([data], { type: mimeType });
562
+ const url = URL.createObjectURL(blob);
563
+ const a = document.createElement('a');
564
+ a.href = url;
565
+ a.download = `llm_dataset_${new Date().toISOString().slice(0,10)}.${extension}`;
566
+ document.body.appendChild(a);
567
+ a.click();
568
+ document.body.removeChild(a);
569
+ URL.revokeObjectURL(url);
570
+ };
571
+
572
+ // Copy button
573
+ copyBtn.onclick = function() {
574
+ navigator.clipboard.writeText(data).then(() => {
575
+ const originalText = copyBtn.innerHTML;
576
+ copyBtn.innerHTML = '<i class="fas fa-check mr-2"></i> Copied!';
577
+ setTimeout(() => {
578
+ copyBtn.innerHTML = originalText;
579
+ }, 2000);
580
+ });
581
+ };
582
+ }
583
+
584
+ // Clear results
585
+ clearResultsBtn.addEventListener('click', function() {
586
+ resultsSection.classList.add('hidden');
587
+ });
588
+ });
589
+ </script>
590
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - <a href="https://enzostvs-deepsite.hf.space?remix=fhsp93/nocodeapprtest" style="color: #fff;text-decoration: underline;" target="_blank" >🧬 Remix</a></p></body>
591
+ </html>