nocodeapprtest / index.html
fhsp93's picture
Add 2 files
e91a8d9 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM Fine-Tuning Data Processor</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
.dropzone {
border: 2px dashed #cbd5e0;
transition: all 0.3s ease;
}
.dropzone.active {
border-color: #4299e1;
background-color: #ebf8ff;
}
.progress-bar {
transition: width 0.3s ease;
}
.text-input {
min-height: 150px;
}
.fade-in {
animation: fadeIn 0.3s ease-in;
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(10px); }
to { opacity: 1; transform: translateY(0); }
}
.custom-scrollbar::-webkit-scrollbar {
width: 8px;
}
.custom-scrollbar::-webkit-scrollbar-track {
background: #f1f1f1;
}
.custom-scrollbar::-webkit-scrollbar-thumb {
background: #cbd5e0;
border-radius: 4px;
}
.custom-scrollbar::-webkit-scrollbar-thumb:hover {
background: #a0aec0;
}
</style>
</head>
<body class="bg-gray-50 min-h-screen">
<div class="container mx-auto px-4 py-8 max-w-6xl">
<header class="mb-8 text-center">
<h1 class="text-3xl font-bold text-gray-800 mb-2">LLM Fine-Tuning Data Processor</h1>
<p class="text-gray-600">Transform raw text into optimized datasets for language model training</p>
</header>
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
<!-- Input Section -->
<div class="lg:col-span-2 bg-white rounded-lg shadow-md overflow-hidden">
<div class="p-6 border-b border-gray-200">
<h2 class="text-xl font-semibold text-gray-700">Data Input</h2>
</div>
<div class="p-6">
<div class="mb-6">
<div class="flex justify-between items-center mb-2">
<label class="block text-sm font-medium text-gray-700">Input Method</label>
</div>
<div class="flex space-x-4">
<button id="textTab" class="tab-btn active px-4 py-2 rounded-md bg-blue-100 text-blue-700 font-medium">Text Input</button>
<button id="fileTab" class="tab-btn px-4 py-2 rounded-md bg-gray-100 text-gray-700 font-medium">File Upload</button>
<button id="apiTab" class="tab-btn px-4 py-2 rounded-md bg-gray-100 text-gray-700 font-medium">API Fetch</button>
</div>
</div>
<!-- Text Input Panel -->
<div id="textPanel" class="input-panel active">
<label class="block text-sm font-medium text-gray-700 mb-2">Enter your text</label>
<textarea id="textInput" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 text-input" placeholder="Paste your text here or type directly..."></textarea>
<div class="flex justify-between items-center mt-2">
<span id="charCount" class="text-xs text-gray-500">0 characters</span>
<button id="clearTextBtn" class="text-xs text-red-500 hover:text-red-700">Clear</button>
</div>
</div>
<!-- File Upload Panel -->
<div id="filePanel" class="input-panel hidden">
<div id="dropzone" class="dropzone rounded-lg p-8 text-center cursor-pointer mb-4">
<i class="fas fa-cloud-upload-alt text-4xl text-gray-400 mb-3"></i>
<p class="text-gray-600 mb-1">Drag & drop files here or click to browse</p>
<p class="text-xs text-gray-500">Supports: .txt, .csv, .json, .pdf (max 10MB)</p>
<input type="file" id="fileInput" class="hidden" multiple accept=".txt,.csv,.json,.pdf">
</div>
<div id="fileList" class="hidden">
<h3 class="text-sm font-medium text-gray-700 mb-2">Selected Files:</h3>
<ul id="fileListItems" class="space-y-2"></ul>
</div>
</div>
<!-- API Panel -->
<div id="apiPanel" class="input-panel hidden">
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700 mb-2">API Endpoint</label>
<input type="text" id="apiUrl" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" placeholder="https://api.example.com/data">
</div>
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700 mb-2">Headers (JSON)</label>
<textarea id="apiHeaders" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 h-24" placeholder='{"Authorization": "Bearer token"}'></textarea>
</div>
<button id="fetchApiBtn" class="w-full bg-blue-600 hover:bg-blue-700 text-white py-2 px-4 rounded-lg transition duration-200">
Fetch Data
</button>
</div>
</div>
</div>
<!-- Processing Options -->
<div class="bg-white rounded-lg shadow-md overflow-hidden">
<div class="p-6 border-b border-gray-200">
<h2 class="text-xl font-semibold text-gray-700">Processing Options</h2>
</div>
<div class="p-6">
<div class="mb-6">
<label class="block text-sm font-medium text-gray-700 mb-2">Output Format</label>
<select id="outputFormat" class="w-full px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
<option value="jsonl">JSON Lines (.jsonl)</option>
<option value="csv">CSV</option>
<option value="txt">Plain Text</option>
</select>
</div>
<div class="mb-6">
<label class="block text-sm font-medium text-gray-700 mb-2">Text Processing</label>
<div class="space-y-2">
<div class="flex items-center">
<input type="checkbox" id="removeEmptyLines" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded" checked>
<label for="removeEmptyLines" class="ml-2 block text-sm text-gray-700">Remove empty lines</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="normalizeWhitespace" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded" checked>
<label for="normalizeWhitespace" class="ml-2 block text-sm text-gray-700">Normalize whitespace</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="removeSpecialChars" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
<label for="removeSpecialChars" class="ml-2 block text-sm text-gray-700">Remove special characters</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="lowercaseText" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
<label for="lowercaseText" class="ml-2 block text-sm text-gray-700">Convert to lowercase</label>
</div>
</div>
</div>
<div class="mb-6">
<label class="block text-sm font-medium text-gray-700 mb-2">Chunking</label>
<div class="flex items-center space-x-2">
<input type="number" id="chunkSize" class="w-20 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500" value="512" min="64" max="4096">
<span class="text-sm text-gray-700">tokens per chunk</span>
</div>
<div class="mt-2 flex items-center">
<input type="checkbox" id="overlapChunks" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
<label for="overlapChunks" class="ml-2 block text-sm text-gray-700">Overlap chunks (25%)</label>
</div>
</div>
<div class="mb-6">
<label class="block text-sm font-medium text-gray-700 mb-2">Metadata</label>
<div id="metadataFields" class="space-y-2">
<div class="flex items-center space-x-2">
<input type="text" placeholder="Key" class="flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
<input type="text" placeholder="Value" class="flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
<button class="remove-metadata-btn text-red-500 hover:text-red-700">
<i class="fas fa-times"></i>
</button>
</div>
</div>
<button id="addMetadataBtn" class="mt-2 text-sm text-blue-600 hover:text-blue-800 flex items-center">
<i class="fas fa-plus mr-1"></i> Add Metadata Field
</button>
</div>
<button id="processBtn" class="w-full bg-blue-600 hover:bg-blue-700 text-white py-3 px-4 rounded-lg font-medium transition duration-200 flex items-center justify-center">
<i class="fas fa-cog mr-2"></i> Process Data
</button>
</div>
</div>
</div>
<!-- Results Section -->
<div id="resultsSection" class="mt-8 bg-white rounded-lg shadow-md overflow-hidden hidden">
<div class="p-6 border-b border-gray-200 flex justify-between items-center">
<h2 class="text-xl font-semibold text-gray-700">Processing Results</h2>
<div class="flex space-x-2">
<button id="downloadBtn" class="bg-green-600 hover:bg-green-700 text-white py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
<i class="fas fa-download mr-2"></i> Download
</button>
<button id="copyBtn" class="bg-gray-200 hover:bg-gray-300 text-gray-800 py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
<i class="fas fa-copy mr-2"></i> Copy
</button>
<button id="clearResultsBtn" class="bg-gray-200 hover:bg-gray-300 text-gray-800 py-2 px-4 rounded-lg text-sm transition duration-200 flex items-center">
<i class="fas fa-trash-alt mr-2"></i> Clear
</button>
</div>
</div>
<div class="p-6">
<div id="statsContainer" class="mb-6 grid grid-cols-1 md:grid-cols-3 gap-4">
<div class="bg-blue-50 p-4 rounded-lg">
<p class="text-sm text-blue-700 font-medium">Total Chunks</p>
<p id="totalChunks" class="text-2xl font-bold text-blue-900">0</p>
</div>
<div class="bg-green-50 p-4 rounded-lg">
<p class="text-sm text-green-700 font-medium">Total Tokens</p>
<p id="totalTokens" class="text-2xl font-bold text-green-900">0</p>
</div>
<div class="bg-purple-50 p-4 rounded-lg">
<p class="text-sm text-purple-700 font-medium">Processing Time</p>
<p id="processingTime" class="text-2xl font-bold text-purple-900">0ms</p>
</div>
</div>
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700 mb-2">Preview (first 3 items)</label>
<div id="resultsPreview" class="bg-gray-50 p-4 rounded-lg overflow-y-auto max-h-96 custom-scrollbar text-sm font-mono text-gray-800"></div>
</div>
<div id="progressContainer" class="hidden">
<div class="flex justify-between mb-1">
<span class="text-sm font-medium text-gray-700">Processing</span>
<span id="progressPercent" class="text-sm font-medium text-gray-700">0%</span>
</div>
<div class="w-full bg-gray-200 rounded-full h-2.5">
<div id="progressBar" class="progress-bar bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
</div>
</div>
</div>
</div>
</div>
<script>
document.addEventListener('DOMContentLoaded', function() {
// UI Elements
const textTab = document.getElementById('textTab');
const fileTab = document.getElementById('fileTab');
const apiTab = document.getElementById('apiTab');
const textPanel = document.getElementById('textPanel');
const filePanel = document.getElementById('filePanel');
const apiPanel = document.getElementById('apiPanel');
const textInput = document.getElementById('textInput');
const charCount = document.getElementById('charCount');
const clearTextBtn = document.getElementById('clearTextBtn');
const dropzone = document.getElementById('dropzone');
const fileInput = document.getElementById('fileInput');
const fileList = document.getElementById('fileList');
const fileListItems = document.getElementById('fileListItems');
const processBtn = document.getElementById('processBtn');
const resultsSection = document.getElementById('resultsSection');
const resultsPreview = document.getElementById('resultsPreview');
const totalChunks = document.getElementById('totalChunks');
const totalTokens = document.getElementById('totalTokens');
const processingTime = document.getElementById('processingTime');
const downloadBtn = document.getElementById('downloadBtn');
const copyBtn = document.getElementById('copyBtn');
const clearResultsBtn = document.getElementById('clearResultsBtn');
const progressContainer = document.getElementById('progressContainer');
const progressBar = document.getElementById('progressBar');
const progressPercent = document.getElementById('progressPercent');
const addMetadataBtn = document.getElementById('addMetadataBtn');
const metadataFields = document.getElementById('metadataFields');
// Tab switching
function switchTab(activeTab, activePanel) {
// Reset all tabs and panels
document.querySelectorAll('.tab-btn').forEach(btn => {
btn.classList.remove('active');
btn.classList.add('bg-gray-100', 'text-gray-700');
btn.classList.remove('bg-blue-100', 'text-blue-700');
});
document.querySelectorAll('.input-panel').forEach(panel => {
panel.classList.add('hidden');
panel.classList.remove('active');
});
// Activate selected tab and panel
activeTab.classList.add('active');
activeTab.classList.remove('bg-gray-100', 'text-gray-700');
activeTab.classList.add('bg-blue-100', 'text-blue-700');
activePanel.classList.remove('hidden');
activePanel.classList.add('active');
}
textTab.addEventListener('click', () => switchTab(textTab, textPanel));
fileTab.addEventListener('click', () => switchTab(fileTab, filePanel));
apiTab.addEventListener('click', () => switchTab(apiTab, apiPanel));
// Text input handling
textInput.addEventListener('input', function() {
const count = this.value.length;
charCount.textContent = `${count} characters`;
});
clearTextBtn.addEventListener('click', function() {
textInput.value = '';
charCount.textContent = '0 characters';
});
// File upload handling
dropzone.addEventListener('click', function() {
fileInput.click();
});
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
dropzone.addEventListener(eventName, preventDefaults, false);
});
function preventDefaults(e) {
e.preventDefault();
e.stopPropagation();
}
['dragenter', 'dragover'].forEach(eventName => {
dropzone.addEventListener(eventName, highlight, false);
});
['dragleave', 'drop'].forEach(eventName => {
dropzone.addEventListener(eventName, unhighlight, false);
});
function highlight() {
dropzone.classList.add('active');
}
function unhighlight() {
dropzone.classList.remove('active');
}
dropzone.addEventListener('drop', handleDrop, false);
function handleDrop(e) {
const dt = e.dataTransfer;
const files = dt.files;
handleFiles(files);
}
fileInput.addEventListener('change', function() {
handleFiles(this.files);
});
function handleFiles(files) {
if (files.length === 0) return;
fileList.classList.remove('hidden');
fileListItems.innerHTML = '';
for (let i = 0; i < files.length; i++) {
const file = files[i];
const listItem = document.createElement('li');
listItem.className = 'flex items-center justify-between bg-gray-50 p-2 rounded';
const fileInfo = document.createElement('div');
fileInfo.className = 'flex items-center';
const icon = document.createElement('i');
icon.className = 'fas fa-file-alt text-gray-500 mr-2';
const fileName = document.createElement('span');
fileName.className = 'text-sm text-gray-700';
fileName.textContent = file.name;
const fileSize = document.createElement('span');
fileSize.className = 'text-xs text-gray-500 ml-2';
fileSize.textContent = formatFileSize(file.size);
const removeBtn = document.createElement('button');
removeBtn.className = 'text-red-500 hover:text-red-700 ml-2';
removeBtn.innerHTML = '<i class="fas fa-times"></i>';
removeBtn.addEventListener('click', function() {
listItem.remove();
if (fileListItems.children.length === 0) {
fileList.classList.add('hidden');
}
});
fileInfo.appendChild(icon);
fileInfo.appendChild(fileName);
fileInfo.appendChild(fileSize);
listItem.appendChild(fileInfo);
listItem.appendChild(removeBtn);
fileListItems.appendChild(listItem);
}
}
function formatFileSize(bytes) {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
// Metadata fields handling
addMetadataBtn.addEventListener('click', function() {
const fieldDiv = document.createElement('div');
fieldDiv.className = 'flex items-center space-x-2 fade-in';
const keyInput = document.createElement('input');
keyInput.type = 'text';
keyInput.placeholder = 'Key';
keyInput.className = 'flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500';
const valueInput = document.createElement('input');
valueInput.type = 'text';
valueInput.placeholder = 'Value';
valueInput.className = 'flex-1 px-3 py-2 text-gray-700 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500';
const removeBtn = document.createElement('button');
removeBtn.className = 'remove-metadata-btn text-red-500 hover:text-red-700';
removeBtn.innerHTML = '<i class="fas fa-times"></i>';
removeBtn.addEventListener('click', function() {
fieldDiv.remove();
});
fieldDiv.appendChild(keyInput);
fieldDiv.appendChild(valueInput);
fieldDiv.appendChild(removeBtn);
metadataFields.appendChild(fieldDiv);
});
// Processing function
processBtn.addEventListener('click', async function() {
// Show loading state
processBtn.disabled = true;
processBtn.innerHTML = '<i class="fas fa-spinner fa-spin mr-2"></i> Processing...';
progressContainer.classList.remove('hidden');
// Simulate processing (in a real app, this would be actual processing)
let progress = 0;
const interval = setInterval(() => {
progress += 5;
progressBar.style.width = `${progress}%`;
progressPercent.textContent = `${progress}%`;
if (progress >= 100) {
clearInterval(interval);
setTimeout(() => {
finishProcessing();
}, 500);
}
}, 100);
});
function finishProcessing() {
// Hide progress bar
progressContainer.classList.add('hidden');
// Generate sample results (in a real app, this would be actual processed data)
const outputFormat = document.getElementById('outputFormat').value;
const sampleData = generateSampleData(outputFormat);
// Update results UI
resultsPreview.textContent = sampleData.preview;
totalChunks.textContent = sampleData.stats.chunks;
totalTokens.textContent = sampleData.stats.tokens;
processingTime.textContent = sampleData.stats.time + 'ms';
// Show results section
resultsSection.classList.remove('hidden');
resultsSection.scrollIntoView({ behavior: 'smooth' });
// Reset process button
processBtn.disabled = false;
processBtn.innerHTML = '<i class="fas fa-cog mr-2"></i> Process Data';
// Set up download and copy buttons
setupDownloadAndCopy(sampleData.fullData, outputFormat);
}
function generateSampleData(format) {
const chunks = Math.floor(Math.random() * 50) + 10;
const tokens = chunks * (Math.floor(Math.random() * 200) + 300);
const time = Math.floor(Math.random() * 2000) + 500;
let preview = '';
let fullData = '';
if (format === 'jsonl') {
for (let i = 0; i < 3; i++) {
preview += JSON.stringify({
text: `This is sample text chunk ${i+1} for fine-tuning your LLM. It contains natural language that would be useful for training.`,
metadata: {
source: "sample",
length: Math.floor(Math.random() * 100) + 50
}
}, null, 2) + '\n\n';
}
// Generate full data
for (let i = 0; i < chunks; i++) {
fullData += JSON.stringify({
text: `This is sample text chunk ${i+1} for fine-tuning your LLM. It contains natural language that would be useful for training.`,
metadata: {
source: "sample",
length: Math.floor(Math.random() * 100) + 50
}
}) + '\n';
}
} else if (format === 'csv') {
preview = 'text,source,length\n';
for (let i = 0; i < 3; i++) {
preview += `"This is sample text chunk ${i+1} for fine-tuning your LLM.","sample",${Math.floor(Math.random() * 100) + 50}\n`;
}
// Generate full data
fullData = 'text,source,length\n';
for (let i = 0; i < chunks; i++) {
fullData += `"This is sample text chunk ${i+1} for fine-tuning your LLM.","sample",${Math.floor(Math.random() * 100) + 50}\n`;
}
} else {
for (let i = 0; i < 3; i++) {
preview += `This is sample text chunk ${i+1} for fine-tuning your LLM.\n\n`;
}
// Generate full data
for (let i = 0; i < chunks; i++) {
fullData += `This is sample text chunk ${i+1} for fine-tuning your LLM.\n\n`;
}
}
return {
preview: preview.trim(),
fullData: fullData.trim(),
stats: {
chunks: chunks,
tokens: tokens,
time: time
}
};
}
function setupDownloadAndCopy(data, format) {
let mimeType, extension;
switch(format) {
case 'jsonl':
mimeType = 'application/json';
extension = 'jsonl';
break;
case 'csv':
mimeType = 'text/csv';
extension = 'csv';
break;
default:
mimeType = 'text/plain';
extension = 'txt';
}
// Download button
downloadBtn.onclick = function() {
const blob = new Blob([data], { type: mimeType });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `llm_dataset_${new Date().toISOString().slice(0,10)}.${extension}`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
};
// Copy button
copyBtn.onclick = function() {
navigator.clipboard.writeText(data).then(() => {
const originalText = copyBtn.innerHTML;
copyBtn.innerHTML = '<i class="fas fa-check mr-2"></i> Copied!';
setTimeout(() => {
copyBtn.innerHTML = originalText;
}, 2000);
});
};
}
// Clear results
clearResultsBtn.addEventListener('click', function() {
resultsSection.classList.add('hidden');
});
});
</script>
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - <a href="https://enzostvs-deepsite.hf.space?remix=fhsp93/nocodeapprtest" style="color: #fff;text-decoration: underline;" target="_blank" >🧬 Remix</a></p></body>
</html>