Spaces:
Running
Running
Add 2 files
Browse files- README.md +7 -5
- index.html +591 -19
README.md
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: static
|
7 |
pinned: false
|
|
|
|
|
8 |
---
|
9 |
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: nocodeapprtest
|
3 |
+
emoji: 🐳
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
sdk: static
|
7 |
pinned: false
|
8 |
+
tags:
|
9 |
+
- deepsite
|
10 |
---
|
11 |
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
index.html
CHANGED
@@ -1,19 +1,591 @@
|
|
1 |
-
<!
|
2 |
-
<html>
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>LLM Fine-Tuning Data Processor</title>
|
7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
8 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
9 |
+
<style>
|
10 |
+
.dropzone {
|
11 |
+
border: 2px dashed #cbd5e0;
|
12 |
+
transition: all 0.3s ease;
|
13 |
+
}
|
14 |
+
.dropzone.active {
|
15 |
+
border-color: #4299e1;
|
16 |
+
background-color: #ebf8ff;
|
17 |
+
}
|
18 |
+
.progress-bar {
|
19 |
+
transition: width 0.3s ease;
|
20 |
+
}
|
21 |
+
.text-input {
|
22 |
+
min-height: 150px;
|
23 |
+
}
|
24 |
+
.fade-in {
|
25 |
+
animation: fadeIn 0.3s ease-in;
|
26 |
+
}
|
27 |
+
@keyframes fadeIn {
|
28 |
+
from { opacity: 0; transform: translateY(10px); }
|
29 |
+
to { opacity: 1; transform: translateY(0); }
|
30 |
+
}
|
31 |
+
.custom-scrollbar::-webkit-scrollbar {
|
32 |
+
width: 8px;
|
33 |
+
}
|
34 |
+
.custom-scrollbar::-webkit-scrollbar-track {
|
35 |
+
background: #f1f1f1;
|
36 |
+
}
|
37 |
+
.custom-scrollbar::-webkit-scrollbar-thumb {
|
38 |
+
background: #cbd5e0;
|
39 |
+
border-radius: 4px;
|
40 |
+
}
|
41 |
+
.custom-scrollbar::-webkit-scrollbar-thumb:hover {
|
42 |
+
background: #a0aec0;
|
43 |
+
}
|
44 |
+
</style>
|
45 |
+
</head>
|
46 |
+
<body class="bg-gray-50 min-h-screen">
|
47 |
+
<div class="container mx-auto px-4 py-8 max-w-6xl">
|
48 |
+
<header class="mb-8 text-center">
|
49 |
+
<h1 class="text-3xl font-bold text-gray-800 mb-2">LLM Fine-Tuning Data Processor</h1>
|
50 |
+
<p class="text-gray-600">Transform raw text into optimized datasets for language model training</p>
|
51 |
+
</header>
|
52 |
+
|
53 |
+
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
54 |
+
<!-- Input Section -->
|
55 |
+
<div class="lg:col-span-2 bg-white rounded-lg shadow-md overflow-hidden">
|
56 |
+
<div class="p-6 border-b border-gray-200">
|
57 |
+
<h2 class="text-xl font-semibold text-gray-700">Data Input</h2>
|
58 |
+
</div>
|
59 |
+
|
60 |
+
<div class="p-6">
|
61 |
+
<div class="mb-6">
|
62 |
+
<div class="flex justify-between items-center mb-2">
|
63 |
+
<label class="block text-sm font-medium text-gray-700">Input Method</label>
|
64 |
+
</div>
|
65 |
+
<div class="flex space-x-4">
|
66 |
+
<button id="textTab" class="tab-btn active px-4 py-2 rounded-md bg-blue-100 text-blue-700 font-medium">Text Input</button>
|
67 |
+
<button id="fileTab" class="tab-btn px-4 py-2 rounded-md bg-gray-100 text-gray-700 font-medium">File Upload</button>
|
68 |
+
<button id="apiTab" class="tab-btn px-4 py-2 rounded-md bg-gray-100 text-gray-700 font-medium">API Fetch</button>
|
69 |
+
</div>
|
70 |
+
</div>
|
71 |
+
|
72 |
+
<!-- Text Input Panel -->
|
73 |
+
<div id="textPanel" class="input-panel active">
|
74 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Enter your text</label>
|
75 |
+
<textarea id="textInput" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-input" placeholder="Paste your text here or type directly..."></textarea>
|
76 |
+
<div class="flex justify-between items-center mt-2">
|
77 |
+
<span id="charCount" class="text-xs text-gray-500">0 characters</span>
|
78 |
+
<button id="clearTextBtn" class="text-xs text-red-500 hover:text-red-700">Clear</button>
|
79 |
+
</div>
|
80 |
+
</div>
|
81 |
+
|
82 |
+
<!-- File Upload Panel -->
|
83 |
+
<div id="filePanel" class="input-panel hidden">
|
84 |
+
<div id="dropzone" class="dropzone rounded-lg p-8 text-center cursor-pointer mb-4">
|
85 |
+
<i class="fas fa-cloud-upload-alt text-4xl text-gray-400 mb-3"></i>
|
86 |
+
<p class="text-gray-600 mb-1">Drag & drop files here or click to browse</p>
|
87 |
+
<p class="text-xs text-gray-500">Supports: .txt, .csv, .json, .pdf (max 10MB)</p>
|
88 |
+
<input type="file" id="fileInput" class="hidden" multiple accept=".txt,.csv,.json,.pdf">
|
89 |
+
</div>
|
90 |
+
<div id="fileList" class="hidden">
|
91 |
+
<h3 class="text-sm font-medium text-gray-700 mb-2">Selected Files:</h3>
|
92 |
+
<ul id="fileListItems" class="space-y-2"></ul>
|
93 |
+
</div>
|
94 |
+
</div>
|
95 |
+
|
96 |
+
<!-- API Panel -->
|
97 |
+
<div id="apiPanel" class="input-panel hidden">
|
98 |
+
<div class="mb-4">
|
99 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">API Endpoint</label>
|
100 |
+
<input type="text" id="apiUrl" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" placeholder="https://api.example.com/data">
|
101 |
+
</div>
|
102 |
+
<div class="mb-4">
|
103 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Headers (JSON)</label>
|
104 |
+
<textarea id="apiHeaders" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 h-24" placeholder='{"Authorization": "Bearer token"}'></textarea>
|
105 |
+
</div>
|
106 |
+
<button id="fetchApiBtn" class="w-full bg-blue-600 hover:bg-blue-700 text-white py-2 px-4 rounded-lg transition duration-200">
|
107 |
+
Fetch Data
|
108 |
+
</button>
|
109 |
+
</div>
|
110 |
+
</div>
|
111 |
+
</div>
|
112 |
+
|
113 |
+
<!-- Processing Options -->
|
114 |
+
<div class="bg-white rounded-lg shadow-md overflow-hidden">
|
115 |
+
<div class="p-6 border-b border-gray-200">
|
116 |
+
<h2 class="text-xl font-semibold text-gray-700">Processing Options</h2>
|
117 |
+
</div>
|
118 |
+
|
119 |
+
<div class="p-6">
|
120 |
+
<div class="mb-6">
|
121 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Output Format</label>
|
122 |
+
<select id="outputFormat" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
|
123 |
+
<option value="jsonl">JSON Lines (.jsonl)</option>
|
124 |
+
<option value="csv">CSV</option>
|
125 |
+
<option value="txt">Plain Text</option>
|
126 |
+
</select>
|
127 |
+
</div>
|
128 |
+
|
129 |
+
<div class="mb-6">
|
130 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Text Processing</label>
|
131 |
+
<div class="space-y-2">
|
132 |
+
<div class="flex items-center">
|
133 |
+
<input type="checkbox" id="removeEmptyLines" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded" checked>
|
134 |
+
<label for="removeEmptyLines" class="ml-2 block text-sm text-gray-700">Remove empty lines</label>
|
135 |
+
</div>
|
136 |
+
<div class="flex items-center">
|
137 |
+
<input type="checkbox" id="normalizeWhitespace" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded" checked>
|
138 |
+
<label for="normalizeWhitespace" class="ml-2 block text-sm text-gray-700">Normalize whitespace</label>
|
139 |
+
</div>
|
140 |
+
<div class="flex items-center">
|
141 |
+
<input type="checkbox" id="removeSpecialChars" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
|
142 |
+
<label for="removeSpecialChars" class="ml-2 block text-sm text-gray-700">Remove special characters</label>
|
143 |
+
</div>
|
144 |
+
<div class="flex items-center">
|
145 |
+
<input type="checkbox" id="lowercaseText" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
|
146 |
+
<label for="lowercaseText" class="ml-2 block text-sm text-gray-700">Convert to lowercase</label>
|
147 |
+
</div>
|
148 |
+
</div>
|
149 |
+
</div>
|
150 |
+
|
151 |
+
<div class="mb-6">
|
152 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Chunking</label>
|
153 |
+
<div class="flex items-center space-x-2">
|
154 |
+
<input type="number" id="chunkSize" class="w-20 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" value="512" min="64" max="4096">
|
155 |
+
<span class="text-sm text-gray-700">tokens per chunk</span>
|
156 |
+
</div>
|
157 |
+
<div class="mt-2 flex items-center">
|
158 |
+
<input type="checkbox" id="overlapChunks" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
|
159 |
+
<label for="overlapChunks" class="ml-2 block text-sm text-gray-700">Overlap chunks (25%)</label>
|
160 |
+
</div>
|
161 |
+
</div>
|
162 |
+
|
163 |
+
<div class="mb-6">
|
164 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Metadata</label>
|
165 |
+
<div id="metadataFields" class="space-y-2">
|
166 |
+
<div class="flex items-center space-x-2">
|
167 |
+
<input type="text" placeholder="Key" class="flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
|
168 |
+
<input type="text" placeholder="Value" class="flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
|
169 |
+
<button class="remove-metadata-btn text-red-500 hover:text-red-700">
|
170 |
+
<i class="fas fa-times"></i>
|
171 |
+
</button>
|
172 |
+
</div>
|
173 |
+
</div>
|
174 |
+
<button id="addMetadataBtn" class="mt-2 text-sm text-blue-600 hover:text-blue-800 flex items-center">
|
175 |
+
<i class="fas fa-plus mr-1"></i> Add Metadata Field
|
176 |
+
</button>
|
177 |
+
</div>
|
178 |
+
|
179 |
+
<button id="processBtn" class="w-full bg-blue-600 hover:bg-blue-700 text-white py-3 px-4 rounded-lg font-medium transition duration-200 flex items-center justify-center">
|
180 |
+
<i class="fas fa-cog mr-2"></i> Process Data
|
181 |
+
</button>
|
182 |
+
</div>
|
183 |
+
</div>
|
184 |
+
</div>
|
185 |
+
|
186 |
+
<!-- Results Section -->
|
187 |
+
<div id="resultsSection" class="mt-8 bg-white rounded-lg shadow-md overflow-hidden hidden">
|
188 |
+
<div class="p-6 border-b border-gray-200 flex justify-between items-center">
|
189 |
+
<h2 class="text-xl font-semibold text-gray-700">Processing Results</h2>
|
190 |
+
<div class="flex space-x-2">
|
191 |
+
<button id="downloadBtn" class="bg-green-600 hover:bg-green-700 text-white py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
|
192 |
+
<i class="fas fa-download mr-2"></i> Download
|
193 |
+
</button>
|
194 |
+
<button id="copyBtn" class="bg-gray-200 hover:bg-gray-300 text-gray-800 py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
|
195 |
+
<i class="fas fa-copy mr-2"></i> Copy
|
196 |
+
</button>
|
197 |
+
<button id="clearResultsBtn" class="bg-gray-200 hover:bg-gray-300 text-gray-800 py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
|
198 |
+
<i class="fas fa-trash-alt mr-2"></i> Clear
|
199 |
+
</button>
|
200 |
+
</div>
|
201 |
+
</div>
|
202 |
+
|
203 |
+
<div class="p-6">
|
204 |
+
<div id="statsContainer" class="mb-6 grid grid-cols-1 md:grid-cols-3 gap-4">
|
205 |
+
<div class="bg-blue-50 p-4 rounded-lg">
|
206 |
+
<p class="text-sm text-blue-700 font-medium">Total Chunks</p>
|
207 |
+
<p id="totalChunks" class="text-2xl font-bold text-blue-900">0</p>
|
208 |
+
</div>
|
209 |
+
<div class="bg-green-50 p-4 rounded-lg">
|
210 |
+
<p class="text-sm text-green-700 font-medium">Total Tokens</p>
|
211 |
+
<p id="totalTokens" class="text-2xl font-bold text-green-900">0</p>
|
212 |
+
</div>
|
213 |
+
<div class="bg-purple-50 p-4 rounded-lg">
|
214 |
+
<p class="text-sm text-purple-700 font-medium">Processing Time</p>
|
215 |
+
<p id="processingTime" class="text-2xl font-bold text-purple-900">0ms</p>
|
216 |
+
</div>
|
217 |
+
</div>
|
218 |
+
|
219 |
+
<div class="mb-4">
|
220 |
+
<label class="block text-sm font-medium text-gray-700 mb-2">Preview (first 3 items)</label>
|
221 |
+
<div id="resultsPreview" class="bg-gray-50 p-4 rounded-lg overflow-y-auto max-h-96 custom-scrollbar text-sm font-mono text-gray-800"></div>
|
222 |
+
</div>
|
223 |
+
|
224 |
+
<div id="progressContainer" class="hidden">
|
225 |
+
<div class="flex justify-between mb-1">
|
226 |
+
<span class="text-sm font-medium text-gray-700">Processing</span>
|
227 |
+
<span id="progressPercent" class="text-sm font-medium text-gray-700">0%</span>
|
228 |
+
</div>
|
229 |
+
<div class="w-full bg-gray-200 rounded-full h-2.5">
|
230 |
+
<div id="progressBar" class="progress-bar bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
|
231 |
+
</div>
|
232 |
+
</div>
|
233 |
+
</div>
|
234 |
+
</div>
|
235 |
+
</div>
|
236 |
+
|
237 |
+
<script>
|
238 |
+
document.addEventListener('DOMContentLoaded', function() {
|
239 |
+
// UI Elements
|
240 |
+
const textTab = document.getElementById('textTab');
|
241 |
+
const fileTab = document.getElementById('fileTab');
|
242 |
+
const apiTab = document.getElementById('apiTab');
|
243 |
+
const textPanel = document.getElementById('textPanel');
|
244 |
+
const filePanel = document.getElementById('filePanel');
|
245 |
+
const apiPanel = document.getElementById('apiPanel');
|
246 |
+
const textInput = document.getElementById('textInput');
|
247 |
+
const charCount = document.getElementById('charCount');
|
248 |
+
const clearTextBtn = document.getElementById('clearTextBtn');
|
249 |
+
const dropzone = document.getElementById('dropzone');
|
250 |
+
const fileInput = document.getElementById('fileInput');
|
251 |
+
const fileList = document.getElementById('fileList');
|
252 |
+
const fileListItems = document.getElementById('fileListItems');
|
253 |
+
const processBtn = document.getElementById('processBtn');
|
254 |
+
const resultsSection = document.getElementById('resultsSection');
|
255 |
+
const resultsPreview = document.getElementById('resultsPreview');
|
256 |
+
const totalChunks = document.getElementById('totalChunks');
|
257 |
+
const totalTokens = document.getElementById('totalTokens');
|
258 |
+
const processingTime = document.getElementById('processingTime');
|
259 |
+
const downloadBtn = document.getElementById('downloadBtn');
|
260 |
+
const copyBtn = document.getElementById('copyBtn');
|
261 |
+
const clearResultsBtn = document.getElementById('clearResultsBtn');
|
262 |
+
const progressContainer = document.getElementById('progressContainer');
|
263 |
+
const progressBar = document.getElementById('progressBar');
|
264 |
+
const progressPercent = document.getElementById('progressPercent');
|
265 |
+
const addMetadataBtn = document.getElementById('addMetadataBtn');
|
266 |
+
const metadataFields = document.getElementById('metadataFields');
|
267 |
+
|
268 |
+
// Tab switching
|
269 |
+
function switchTab(activeTab, activePanel) {
|
270 |
+
// Reset all tabs and panels
|
271 |
+
document.querySelectorAll('.tab-btn').forEach(btn => {
|
272 |
+
btn.classList.remove('active');
|
273 |
+
btn.classList.add('bg-gray-100', 'text-gray-700');
|
274 |
+
btn.classList.remove('bg-blue-100', 'text-blue-700');
|
275 |
+
});
|
276 |
+
|
277 |
+
document.querySelectorAll('.input-panel').forEach(panel => {
|
278 |
+
panel.classList.add('hidden');
|
279 |
+
panel.classList.remove('active');
|
280 |
+
});
|
281 |
+
|
282 |
+
// Activate selected tab and panel
|
283 |
+
activeTab.classList.add('active');
|
284 |
+
activeTab.classList.remove('bg-gray-100', 'text-gray-700');
|
285 |
+
activeTab.classList.add('bg-blue-100', 'text-blue-700');
|
286 |
+
|
287 |
+
activePanel.classList.remove('hidden');
|
288 |
+
activePanel.classList.add('active');
|
289 |
+
}
|
290 |
+
|
291 |
+
textTab.addEventListener('click', () => switchTab(textTab, textPanel));
|
292 |
+
fileTab.addEventListener('click', () => switchTab(fileTab, filePanel));
|
293 |
+
apiTab.addEventListener('click', () => switchTab(apiTab, apiPanel));
|
294 |
+
|
295 |
+
// Text input handling
|
296 |
+
textInput.addEventListener('input', function() {
|
297 |
+
const count = this.value.length;
|
298 |
+
charCount.textContent = `${count} characters`;
|
299 |
+
});
|
300 |
+
|
301 |
+
clearTextBtn.addEventListener('click', function() {
|
302 |
+
textInput.value = '';
|
303 |
+
charCount.textContent = '0 characters';
|
304 |
+
});
|
305 |
+
|
306 |
+
// File upload handling
|
307 |
+
dropzone.addEventListener('click', function() {
|
308 |
+
fileInput.click();
|
309 |
+
});
|
310 |
+
|
311 |
+
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
|
312 |
+
dropzone.addEventListener(eventName, preventDefaults, false);
|
313 |
+
});
|
314 |
+
|
315 |
+
function preventDefaults(e) {
|
316 |
+
e.preventDefault();
|
317 |
+
e.stopPropagation();
|
318 |
+
}
|
319 |
+
|
320 |
+
['dragenter', 'dragover'].forEach(eventName => {
|
321 |
+
dropzone.addEventListener(eventName, highlight, false);
|
322 |
+
});
|
323 |
+
|
324 |
+
['dragleave', 'drop'].forEach(eventName => {
|
325 |
+
dropzone.addEventListener(eventName, unhighlight, false);
|
326 |
+
});
|
327 |
+
|
328 |
+
function highlight() {
|
329 |
+
dropzone.classList.add('active');
|
330 |
+
}
|
331 |
+
|
332 |
+
function unhighlight() {
|
333 |
+
dropzone.classList.remove('active');
|
334 |
+
}
|
335 |
+
|
336 |
+
dropzone.addEventListener('drop', handleDrop, false);
|
337 |
+
|
338 |
+
function handleDrop(e) {
|
339 |
+
const dt = e.dataTransfer;
|
340 |
+
const files = dt.files;
|
341 |
+
handleFiles(files);
|
342 |
+
}
|
343 |
+
|
344 |
+
fileInput.addEventListener('change', function() {
|
345 |
+
handleFiles(this.files);
|
346 |
+
});
|
347 |
+
|
348 |
+
function handleFiles(files) {
|
349 |
+
if (files.length === 0) return;
|
350 |
+
|
351 |
+
fileList.classList.remove('hidden');
|
352 |
+
fileListItems.innerHTML = '';
|
353 |
+
|
354 |
+
for (let i = 0; i < files.length; i++) {
|
355 |
+
const file = files[i];
|
356 |
+
const listItem = document.createElement('li');
|
357 |
+
listItem.className = 'flex items-center justify-between bg-gray-50 p-2 rounded';
|
358 |
+
|
359 |
+
const fileInfo = document.createElement('div');
|
360 |
+
fileInfo.className = 'flex items-center';
|
361 |
+
|
362 |
+
const icon = document.createElement('i');
|
363 |
+
icon.className = 'fas fa-file-alt text-gray-500 mr-2';
|
364 |
+
|
365 |
+
const fileName = document.createElement('span');
|
366 |
+
fileName.className = 'text-sm text-gray-700';
|
367 |
+
fileName.textContent = file.name;
|
368 |
+
|
369 |
+
const fileSize = document.createElement('span');
|
370 |
+
fileSize.className = 'text-xs text-gray-500 ml-2';
|
371 |
+
fileSize.textContent = formatFileSize(file.size);
|
372 |
+
|
373 |
+
const removeBtn = document.createElement('button');
|
374 |
+
removeBtn.className = 'text-red-500 hover:text-red-700 ml-2';
|
375 |
+
removeBtn.innerHTML = '<i class="fas fa-times"></i>';
|
376 |
+
removeBtn.addEventListener('click', function() {
|
377 |
+
listItem.remove();
|
378 |
+
if (fileListItems.children.length === 0) {
|
379 |
+
fileList.classList.add('hidden');
|
380 |
+
}
|
381 |
+
});
|
382 |
+
|
383 |
+
fileInfo.appendChild(icon);
|
384 |
+
fileInfo.appendChild(fileName);
|
385 |
+
fileInfo.appendChild(fileSize);
|
386 |
+
|
387 |
+
listItem.appendChild(fileInfo);
|
388 |
+
listItem.appendChild(removeBtn);
|
389 |
+
|
390 |
+
fileListItems.appendChild(listItem);
|
391 |
+
}
|
392 |
+
}
|
393 |
+
|
394 |
+
function formatFileSize(bytes) {
|
395 |
+
if (bytes === 0) return '0 Bytes';
|
396 |
+
const k = 1024;
|
397 |
+
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
398 |
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
399 |
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
400 |
+
}
|
401 |
+
|
402 |
+
// Metadata fields handling
|
403 |
+
addMetadataBtn.addEventListener('click', function() {
|
404 |
+
const fieldDiv = document.createElement('div');
|
405 |
+
fieldDiv.className = 'flex items-center space-x-2 fade-in';
|
406 |
+
|
407 |
+
const keyInput = document.createElement('input');
|
408 |
+
keyInput.type = 'text';
|
409 |
+
keyInput.placeholder = 'Key';
|
410 |
+
keyInput.className = 'flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500';
|
411 |
+
|
412 |
+
const valueInput = document.createElement('input');
|
413 |
+
valueInput.type = 'text';
|
414 |
+
valueInput.placeholder = 'Value';
|
415 |
+
valueInput.className = 'flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500';
|
416 |
+
|
417 |
+
const removeBtn = document.createElement('button');
|
418 |
+
removeBtn.className = 'remove-metadata-btn text-red-500 hover:text-red-700';
|
419 |
+
removeBtn.innerHTML = '<i class="fas fa-times"></i>';
|
420 |
+
removeBtn.addEventListener('click', function() {
|
421 |
+
fieldDiv.remove();
|
422 |
+
});
|
423 |
+
|
424 |
+
fieldDiv.appendChild(keyInput);
|
425 |
+
fieldDiv.appendChild(valueInput);
|
426 |
+
fieldDiv.appendChild(removeBtn);
|
427 |
+
|
428 |
+
metadataFields.appendChild(fieldDiv);
|
429 |
+
});
|
430 |
+
|
431 |
+
// Processing function
|
432 |
+
processBtn.addEventListener('click', async function() {
|
433 |
+
// Show loading state
|
434 |
+
processBtn.disabled = true;
|
435 |
+
processBtn.innerHTML = '<i class="fas fa-spinner fa-spin mr-2"></i> Processing...';
|
436 |
+
progressContainer.classList.remove('hidden');
|
437 |
+
|
438 |
+
// Simulate processing (in a real app, this would be actual processing)
|
439 |
+
let progress = 0;
|
440 |
+
const interval = setInterval(() => {
|
441 |
+
progress += 5;
|
442 |
+
progressBar.style.width = `${progress}%`;
|
443 |
+
progressPercent.textContent = `${progress}%`;
|
444 |
+
|
445 |
+
if (progress >= 100) {
|
446 |
+
clearInterval(interval);
|
447 |
+
setTimeout(() => {
|
448 |
+
finishProcessing();
|
449 |
+
}, 500);
|
450 |
+
}
|
451 |
+
}, 100);
|
452 |
+
});
|
453 |
+
|
454 |
+
function finishProcessing() {
|
455 |
+
// Hide progress bar
|
456 |
+
progressContainer.classList.add('hidden');
|
457 |
+
|
458 |
+
// Generate sample results (in a real app, this would be actual processed data)
|
459 |
+
const outputFormat = document.getElementById('outputFormat').value;
|
460 |
+
const sampleData = generateSampleData(outputFormat);
|
461 |
+
|
462 |
+
// Update results UI
|
463 |
+
resultsPreview.textContent = sampleData.preview;
|
464 |
+
totalChunks.textContent = sampleData.stats.chunks;
|
465 |
+
totalTokens.textContent = sampleData.stats.tokens;
|
466 |
+
processingTime.textContent = sampleData.stats.time + 'ms';
|
467 |
+
|
468 |
+
// Show results section
|
469 |
+
resultsSection.classList.remove('hidden');
|
470 |
+
resultsSection.scrollIntoView({ behavior: 'smooth' });
|
471 |
+
|
472 |
+
// Reset process button
|
473 |
+
processBtn.disabled = false;
|
474 |
+
processBtn.innerHTML = '<i class="fas fa-cog mr-2"></i> Process Data';
|
475 |
+
|
476 |
+
// Set up download and copy buttons
|
477 |
+
setupDownloadAndCopy(sampleData.fullData, outputFormat);
|
478 |
+
}
|
479 |
+
|
480 |
+
function generateSampleData(format) {
|
481 |
+
const chunks = Math.floor(Math.random() * 50) + 10;
|
482 |
+
const tokens = chunks * (Math.floor(Math.random() * 200) + 300);
|
483 |
+
const time = Math.floor(Math.random() * 2000) + 500;
|
484 |
+
|
485 |
+
let preview = '';
|
486 |
+
let fullData = '';
|
487 |
+
|
488 |
+
if (format === 'jsonl') {
|
489 |
+
for (let i = 0; i < 3; i++) {
|
490 |
+
preview += JSON.stringify({
|
491 |
+
text: `This is sample text chunk ${i+1} for fine-tuning your LLM. It contains natural language that would be useful for training.`,
|
492 |
+
metadata: {
|
493 |
+
source: "sample",
|
494 |
+
length: Math.floor(Math.random() * 100) + 50
|
495 |
+
}
|
496 |
+
}, null, 2) + '\n\n';
|
497 |
+
}
|
498 |
+
|
499 |
+
// Generate full data
|
500 |
+
for (let i = 0; i < chunks; i++) {
|
501 |
+
fullData += JSON.stringify({
|
502 |
+
text: `This is sample text chunk ${i+1} for fine-tuning your LLM. It contains natural language that would be useful for training.`,
|
503 |
+
metadata: {
|
504 |
+
source: "sample",
|
505 |
+
length: Math.floor(Math.random() * 100) + 50
|
506 |
+
}
|
507 |
+
}) + '\n';
|
508 |
+
}
|
509 |
+
} else if (format === 'csv') {
|
510 |
+
preview = 'text,source,length\n';
|
511 |
+
for (let i = 0; i < 3; i++) {
|
512 |
+
preview += `"This is sample text chunk ${i+1} for fine-tuning your LLM.","sample",${Math.floor(Math.random() * 100) + 50}\n`;
|
513 |
+
}
|
514 |
+
|
515 |
+
// Generate full data
|
516 |
+
fullData = 'text,source,length\n';
|
517 |
+
for (let i = 0; i < chunks; i++) {
|
518 |
+
fullData += `"This is sample text chunk ${i+1} for fine-tuning your LLM.","sample",${Math.floor(Math.random() * 100) + 50}\n`;
|
519 |
+
}
|
520 |
+
} else {
|
521 |
+
for (let i = 0; i < 3; i++) {
|
522 |
+
preview += `This is sample text chunk ${i+1} for fine-tuning your LLM.\n\n`;
|
523 |
+
}
|
524 |
+
|
525 |
+
// Generate full data
|
526 |
+
for (let i = 0; i < chunks; i++) {
|
527 |
+
fullData += `This is sample text chunk ${i+1} for fine-tuning your LLM.\n\n`;
|
528 |
+
}
|
529 |
+
}
|
530 |
+
|
531 |
+
return {
|
532 |
+
preview: preview.trim(),
|
533 |
+
fullData: fullData.trim(),
|
534 |
+
stats: {
|
535 |
+
chunks: chunks,
|
536 |
+
tokens: tokens,
|
537 |
+
time: time
|
538 |
+
}
|
539 |
+
};
|
540 |
+
}
|
541 |
+
|
542 |
+
function setupDownloadAndCopy(data, format) {
|
543 |
+
let mimeType, extension;
|
544 |
+
|
545 |
+
switch(format) {
|
546 |
+
case 'jsonl':
|
547 |
+
mimeType = 'application/json';
|
548 |
+
extension = 'jsonl';
|
549 |
+
break;
|
550 |
+
case 'csv':
|
551 |
+
mimeType = 'text/csv';
|
552 |
+
extension = 'csv';
|
553 |
+
break;
|
554 |
+
default:
|
555 |
+
mimeType = 'text/plain';
|
556 |
+
extension = 'txt';
|
557 |
+
}
|
558 |
+
|
559 |
+
// Download button
|
560 |
+
downloadBtn.onclick = function() {
|
561 |
+
const blob = new Blob([data], { type: mimeType });
|
562 |
+
const url = URL.createObjectURL(blob);
|
563 |
+
const a = document.createElement('a');
|
564 |
+
a.href = url;
|
565 |
+
a.download = `llm_dataset_${new Date().toISOString().slice(0,10)}.${extension}`;
|
566 |
+
document.body.appendChild(a);
|
567 |
+
a.click();
|
568 |
+
document.body.removeChild(a);
|
569 |
+
URL.revokeObjectURL(url);
|
570 |
+
};
|
571 |
+
|
572 |
+
// Copy button
|
573 |
+
copyBtn.onclick = function() {
|
574 |
+
navigator.clipboard.writeText(data).then(() => {
|
575 |
+
const originalText = copyBtn.innerHTML;
|
576 |
+
copyBtn.innerHTML = '<i class="fas fa-check mr-2"></i> Copied!';
|
577 |
+
setTimeout(() => {
|
578 |
+
copyBtn.innerHTML = originalText;
|
579 |
+
}, 2000);
|
580 |
+
});
|
581 |
+
};
|
582 |
+
}
|
583 |
+
|
584 |
+
// Clear results
|
585 |
+
clearResultsBtn.addEventListener('click', function() {
|
586 |
+
resultsSection.classList.add('hidden');
|
587 |
+
});
|
588 |
+
});
|
589 |
+
</script>
|
590 |
+
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - <a href="https://enzostvs-deepsite.hf.space?remix=fhsp93/nocodeapprtest" style="color: #fff;text-decoration: underline;" target="_blank" >🧬 Remix</a></p></body>
|
591 |
+
</html>
|