Spaces:
Running
Running
<html lang="es"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Preguntas y Respuestas con DistilBERT Multilingual</title> | |
<style> | |
textarea { | |
width: 100%; | |
height: 300px; | |
margin-top: 10px; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>Preguntas sobre un Documento</h1> | |
<!-- Formulario para cargar archivos (permitir carga múltiple) --> | |
<input type="file" id="fileInput" accept=".pdf,.txt,.docx" multiple> | |
<button onclick="processFiles()">Cargar y Analizar Archivos</button> | |
<!-- Área de texto para la pregunta --> | |
<br><br> | |
<label for="question">Pregunta:</label> | |
<input type="text" id="question" placeholder="Escribe tu pregunta aquí" /> | |
<!-- Botón para enviar la pregunta --> | |
<button onclick="askQuestion()">Enviar Pregunta</button> | |
<!-- Área para mostrar el texto extraído de los archivos --> | |
<h3>Texto Extraído:</h3> | |
<textarea id="extractedText" readonly></textarea> | |
<!-- Área para mostrar la respuesta --> | |
<h3>Respuesta:</h3> | |
<div id="response"></div> | |
<!-- Cargar la librería PDF.js --> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script> | |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jszip.min.js"></script> | |
<script> | |
// Global variable to hold extracted text | |
let extractedText = ''; | |
let modelReady = false; | |
// Function to process the uploaded files (PDF, TXT, DOCX) | |
function processFiles() { | |
const fileInput = document.getElementById('fileInput'); | |
const files = fileInput.files; | |
if (files.length === 0) { | |
alert('Por favor, selecciona al menos un archivo.'); | |
return; | |
} | |
extractedText = ''; // Reset the extracted text | |
let filePromises = []; | |
// Process each file based on its type | |
for (let file of files) { | |
const fileExtension = file.name.split('.').pop().toLowerCase(); | |
const reader = new FileReader(); | |
const promise = new Promise((resolve, reject) => { | |
reader.onload = function(event) { | |
const fileContent = event.target.result; | |
// Extract text based on the file type | |
if (fileExtension === 'pdf') { | |
extractTextFromPDF(fileContent, resolve); | |
} else if (fileExtension === 'txt') { | |
extractTextFromTXT(fileContent, resolve); | |
} else if (fileExtension === 'docx') { | |
extractTextFromDOCX(fileContent, resolve); | |
} else { | |
reject(`Formato de archivo no soportado: ${file.name}`); | |
} | |
}; | |
reader.readAsArrayBuffer(file); | |
}); | |
filePromises.push(promise); | |
} | |
// Wait for all file promises to finish | |
Promise.all(filePromises) | |
.then(() => { | |
// Display extracted text in the textarea | |
document.getElementById('extractedText').value = extractedText; | |
alert('Texto extraído de los archivos.'); | |
}) | |
.catch((error) => { | |
console.error('Error al procesar los archivos:', error); | |
alert('Hubo un error al procesar los archivos.'); | |
}); | |
} | |
// Function to extract text from a PDF file | |
function extractTextFromPDF(fileContent, resolve) { | |
const loadingTask = pdfjsLib.getDocument({ data: fileContent }); | |
loadingTask.promise.then(function(pdf) { | |
let text = ''; | |
const numPages = pdf.numPages; | |
let pagePromises = []; | |
for (let i = 1; i <= numPages; i++) { | |
pagePromises.push(pdf.getPage(i).then(function(page) { | |
return page.getTextContent().then(function(textContent) { | |
// Concatenate text extracted from each page | |
text += textContent.items.map(item => item.str).join(' ') + '\n'; | |
}); | |
})); | |
} | |
Promise.all(pagePromises).then(function() { | |
extractedText += text + '\n'; // Add the extracted text | |
resolve(); | |
}); | |
}).catch(function(error) { | |
console.error('Error al extraer texto del PDF:', error); | |
resolve(); | |
}); | |
} | |
// Function to extract text from a TXT file | |
function extractTextFromTXT(fileContent, resolve) { | |
extractedText += fileContent + '\n'; // Add the extracted text | |
resolve(); | |
} | |
// Function to extract text from a DOCX file | |
function extractTextFromDOCX(fileContent, resolve) { | |
const zip = new JSZip(); | |
zip.loadAsync(fileContent).then(function(zip) { | |
zip.file('word/document.xml').async('string').then(function(content) { | |
const parser = new DOMParser(); | |
const xmlDoc = parser.parseFromString(content, 'text/xml'); | |
const texts = xmlDoc.getElementsByTagName('w:t'); | |
extractedText += Array.from(texts).map(t => t.textContent).join(' ') + '\n'; | |
resolve(); | |
}).catch(function(error) { | |
console.error('Error al extraer texto del DOCX:', error); | |
resolve(); | |
}); | |
}); | |
} | |
// Function to check if the model is ready | |
async function checkModelReady() { | |
try { | |
const response = await fetch('https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased', { | |
method: 'GET', | |
headers: { | |
'Authorization': 'Bearer your_huggingface_api_key' // Replace with your Hugging Face API key | |
} | |
}); | |
const result = await response.json(); | |
if (result.error && result.error.includes('currently loading')) { | |
const estimatedTime = result.estimated_time || 20; | |
alert(`El modelo está cargando. Estimación de tiempo restante: ${Math.round(estimatedTime)} segundos.`); | |
return false; | |
} | |
modelReady = true; | |
return true; | |
} catch (error) { | |
console.error('Error al verificar si el modelo está listo:', error); | |
return false; | |
} | |
} | |
// Function to validate inputs and send the question to Hugging Face API | |
async function askQuestion() { | |
const question = document.getElementById('question').value; | |
const context = extractedText; | |
// Validate if question and context are non-empty strings | |
if (typeof question !== 'string' || typeof context !== 'string' || question.trim() === '' || context.trim() === '') { | |
alert('Por favor, ingresa una pregunta y asegúrate de que el contexto no esté vacío.'); | |
return; | |
} | |
// Check if the model is ready | |
const modelReady = await checkModelReady(); | |
if (!modelReady) { | |
return; // Don't continue if the model isn't ready | |
} | |
// Prepare the request data | |
const data = { | |
inputs: { | |
question: question, // Should be a string | |
context: context // Should be a string | |
} | |
}; | |
const cucu = window.huggingface.variables["API_KEY_2"]; | |
console.log("key : " + cucu); | |
try { | |
const response = await fetch('https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased', { | |
method: 'POST', | |
headers: { | |
'Authorization': 'Bearer ' + cucu, // Replace with your Hugging Face API key | |
'Content-Type': 'application/json' | |
}, | |
body: JSON.stringify(data) | |
}); | |
const result = await response.json(); | |
if (response.ok) { | |
document.getElementById('response').innerText = result.answer; | |
} else { | |
console.error('Error en la respuesta:', result); | |
alert(`Hubo un error al procesar la solicitud: ${JSON.stringify(result)}`); | |
} | |
} catch (error) { | |
console.error('Error al hacer la consulta:', error); | |
alert('Hubo un error al procesar la solicitud.'); | |
} | |
} | |
</script> | |
</body> | |
</html> | |