Spaces:

eduardmtz
/

www

Running

App Files Files Community

eduardmtz commited on Dec 28, 2024

Commit

41242e5

verified ·

1 Parent(s): 91ff9c1

Update test5.html

Browse files

Files changed (1) hide show

test5.html +110 -166

test5.html CHANGED Viewed

@@ -3,211 +3,156 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Preguntas y Respuestas con DistilBERT Multilingual</title>
     <style>
-        textarea {
-            width: 100%;
-            height: 300px;
-            margin-top: 10px;
-        }
     </style>
 </head>
 <body>
-    <h1>Preguntas sobre un Documento</h1>
-    <!-- Formulario para cargar archivos (permitir carga múltiple) -->
     <input type="file" id="fileInput" accept=".pdf,.txt,.docx" multiple>
-    <button onclick="processFiles()">Cargar y Analizar Archivos</button>
-    <!-- Área de texto para la pregunta -->
     <br><br>
     <label for="question">Pregunta:</label>
-    <input type="text" id="question" placeholder="Escribe tu pregunta aquí" />
-    <!-- Botón para enviar la pregunta -->
-    <button onclick="askQuestion()">Enviar Pregunta</button>
-    <!-- Área para mostrar el texto extraído de los archivos -->
-    <h3>Texto Extraído:</h3>
-    <textarea id="extractedText" readonly></textarea>
-    <!-- Área para mostrar la respuesta -->
-    <h3>Respuesta:</h3>
     <div id="response"></div>
-    <!-- Cargar la librería PDF.js -->
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jszip.min.js"></script>
     <script>
-        // Configuración de PDF.js (Especificar la ubicación del archivo workerSrc)
-        pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.worker.min.js';
-        // Global variable to hold extracted text
-        let extractedText = '';
-        let modelReady = false;
-        // Function to process the uploaded files (PDF, TXT, DOCX)
         function processFiles() {
             const fileInput = document.getElementById('fileInput');
-            const files = fileInput.files;
-            if (files.length === 0) {
-                alert('Por favor, selecciona al menos un archivo.');
-                return;
-            }
-            extractedText = ''; // Reset the extracted text
-            let filePromises = [];
-            // Process each file based on its type
-            for (let file of files) {
-                const fileExtension = file.name.split('.').pop().toLowerCase();
                 const reader = new FileReader();
-                const promise = new Promise((resolve, reject) => {
-                    reader.onload = function(event) {
-                        const fileContent = event.target.result;
-                        // Extract text based on the file type
-                        if (fileExtension === 'pdf') {
-                            extractTextFromPDF(fileContent, resolve);
-                        } else if (fileExtension === 'txt') {
-                            extractTextFromTXT(fileContent, resolve);
-                        } else if (fileExtension === 'docx') {
-                            extractTextFromDOCX(fileContent, resolve);
-                        } else {
-                            reject(`Formato de archivo no soportado: ${file.name}`);
-                        }
-                    };
                     reader.readAsArrayBuffer(file);
-                });
-                filePromises.push(promise);
-            }
-            // Wait for all file promises to finish
-            Promise.all(filePromises)
-                .then(() => {
-                    // Display extracted text in the textarea
-                    document.getElementById('extractedText').value = extractedText;
-                    alert('Texto extraído de los archivos.');
-                })
-                .catch((error) => {
-                    console.error('Error al procesar los archivos:', error);
-                    alert('Hubo un error al procesar los archivos.');
-                });
-        }
-        // Function to extract text from a PDF file
-        function extractTextFromPDF(fileContent, resolve) {
-            const loadingTask = pdfjsLib.getDocument({ data: fileContent });
-            loadingTask.promise.then(function(pdf) {
-                let text = '';
-                const numPages = pdf.numPages;
-                let pagePromises = [];
-                for (let i = 1; i <= numPages; i++) {
-                    pagePromises.push(pdf.getPage(i).then(function(page) {
-                        return page.getTextContent().then(function(textContent) {
-                            // Concatenate text extracted from each page
-                            text += textContent.items.map(item => item.str).join(' ') + '\n';
-                        });
-                    }));
                 }
-                Promise.all(pagePromises).then(function() {
-                    extractedText += text + '\n'; // Add the extracted text
-                    resolve();
-                });
-            }).catch(function(error) {
-                console.error('Error al extraer texto del PDF:', error);
-                resolve();
             });
         }
-        // Function to extract text from a TXT file
-        function extractTextFromTXT(fileContent, resolve) {
-            extractedText += fileContent + '\n'; // Add the extracted text
-            resolve();
-        }
-        // Function to extract text from a DOCX file
-        function extractTextFromDOCX(fileContent, resolve) {
-            const zip = new JSZip();
-            zip.loadAsync(fileContent).then(function(zip) {
-                zip.file('word/document.xml').async('string').then(function(content) {
-                    const parser = new DOMParser();
-                    const xmlDoc = parser.parseFromString(content, 'text/xml');
-                    const texts = xmlDoc.getElementsByTagName('w:t');
-                    extractedText += Array.from(texts).map(t => t.textContent).join(' ') + '\n';
-                    resolve();
-                }).catch(function(error) {
-                    console.error('Error al extraer texto del DOCX:', error);
-                    resolve();
-                });
             });
         }
-        // Function to check if the model is ready
-        async function checkModelReady() {
-            try {
-                const response = await fetch('https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased', {
-                    method: 'GET',
-                    headers: {
-                        'Authorization': 'Bearer your_huggingface_api_key' // Replace with your Hugging Face API key
                     }
                 });
-                const result = await response.json();
-                if (result.error && result.error.includes('currently loading')) {
-                    const estimatedTime = result.estimated_time || 20;
-                    alert(`El modelo está cargando. Estimación de tiempo restante: ${Math.round(estimatedTime)} segundos.`);
-                    return false;
-                }
-                modelReady = true;
-                return true;
-            } catch (error) {
-                console.error('Error al verificar si el modelo está listo:', error);
-                return false;
-            }
         }
-        // Function to validate inputs and send the question to Hugging Face API
         async function askQuestion() {
             const question = document.getElementById('question').value;
-            const context = extractedText;
-            // Validate if question and context are non-empty strings
-            if (typeof question !== 'string' || typeof context !== 'string' || question.trim() === '' || context.trim() === '') {
-                alert('Por favor, ingresa una pregunta y asegúrate de que el contexto no esté vacío.');
                 return;
             }
-            // Check if the model is ready
-            const modelReady = await checkModelReady();
-            if (!modelReady) {
-                return; // Don't continue if the model isn't ready
-            }
-            // Prepare the request data
             const data = {
                 inputs: {
-                    question: question,  // Should be a string
-                    context: context     // Should be a string
                 }
             };
-            const cucu = window.huggingface.variables["API_KEY_2"];
             console.log("key : " + cucu);
             try {
-                const response = await fetch('https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased', {
                     method: 'POST',
-                    headers: {
-                        'Authorization': 'Bearer ' + cucu, // Replace with your Hugging Face API key
-                        'Content-Type': 'application/json'
-                    },
                     body: JSON.stringify(data)
                 });
@@ -215,12 +160,11 @@
                 if (response.ok) {
                     document.getElementById('response').innerText = result.answer;
                 } else {
-                    console.error('Error en la respuesta:', result);
-                    alert(`Hubo un error al procesar la solicitud: ${JSON.stringify(result)}`);
                 }
             } catch (error) {
                 console.error('Error al hacer la consulta:', error);
-                alert('Hubo un error al procesar la solicitud.');
             }
         }
     </script>

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Extracción de Texto y Preguntas con Modelo Multilingüe</title>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
     <style>
+        body { font-family: Arial, sans-serif; margin: 20px; }
+        #output { margin-top: 20px; }
+        textarea { width: 100%; height: 300px; }
     </style>
 </head>
 <body>
+    <h1>Sube tu archivo (PDF, TXT, DOCX) y haz preguntas</h1>
     <input type="file" id="fileInput" accept=".pdf,.txt,.docx" multiple>
     <br><br>
+    <button onclick="processFiles()">Cargar y procesar archivos</button>
+    <br><br>
+    <textarea id="output" placeholder="Texto extraído del archivo..."></textarea>
+    <br><br>
     <label for="question">Pregunta:</label>
+    <input type="text" id="question" placeholder="Escribe tu pregunta aquí...">
+    <br><br>
+    <button onclick="askQuestion()">Preguntar</button>
     <div id="response"></div>
     <script>
+        // Función para limpiar el texto extraído
+        function cleanText(text) {
+            return text.replace(/\s+/g, ' ').trim(); // Elimina espacios y saltos de línea innecesarios
+        }
+        // Función para procesar los archivos cargados
         function processFiles() {
             const fileInput = document.getElementById('fileInput');
+            const output = document.getElementById('output');
+            let extractedText = '';
+            Array.from(fileInput.files).forEach(file => {
                 const reader = new FileReader();
+                reader.onload = function(e) {
+                    const fileType = file.name.split('.').pop().toLowerCase();
+                    if (fileType === 'txt') {
+                        extractedText += cleanText(e.target.result) + '\n';
+                    } else if (fileType === 'pdf') {
+                        extractTextFromPDF(e.target.result).then(text => {
+                            extractedText += cleanText(text) + '\n';
+                            output.value = extractedText;
+                        });
+                    } else if (fileType === 'docx') {
+                        extractTextFromDOCX(e.target.result).then(text => {
+                            extractedText += cleanText(text) + '\n';
+                            output.value = extractedText;
+                        });
+                    }
+                };
+                if (file.type === 'application/pdf') {
                     reader.readAsArrayBuffer(file);
+                } else {
+                    reader.readAsText(file);
                 }
             });
         }
+        // Función para extraer texto de un archivo PDF
+        function extractTextFromPDF(pdfData) {
+            return new Promise((resolve, reject) => {
+                const loadingTask = pdfjsLib.getDocument({data: pdfData});
+                loadingTask.promise.then(pdf => {
+                    let text = '';
+                    let pageNumber = 1;
+                    function extractPageText(pageNum) {
+                        pdf.getPage(pageNum).then(page => {
+                            page.getTextContent().then(content => {
+                                content.items.forEach(item => {
+                                    text += item.str + ' ';
+                                });
+                                if (pageNum < pdf.numPages) {
+                                    extractPageText(pageNum + 1);
+                                } else {
+                                    resolve(text);
+                                }
+                            });
+                        });
+                    }
+                    extractPageText(pageNumber);
+                }, reject);
             });
         }
+        // Función para extraer texto de un archivo DOCX
+        function extractTextFromDOCX(docxData) {
+            return new Promise((resolve, reject) => {
+                JSZip.loadAsync(docxData).then(function(zip) {
+                    const xmlFile = zip.file("word/document.xml");
+                    if (!xmlFile) {
+                        reject("Archivo XML no encontrado.");
+                    } else {
+                        xmlFile.async("string").then(function(xmlText) {
+                            const parser = new DOMParser();
+                            const xmlDoc = parser.parseFromString(xmlText, "text/xml");
+                            let text = '';
+                            const paragraphs = xmlDoc.getElementsByTagName('w:t');
+                            for (let i = 0; i < paragraphs.length; i++) {
+                                text += paragraphs[i].textContent + ' ';
+                            }
+                            resolve(text);
+                        });
                     }
                 });
+            });
         }
+        // Función para hacer preguntas al modelo de Hugging Face
         async function askQuestion() {
             const question = document.getElementById('question').value;
+            const context = document.getElementById('output').value;
+            if (!question || !context) {
+                alert("Por favor, asegúrate de que hay texto y una pregunta.");
                 return;
             }
             const data = {
                 inputs: {
+                    question: question,
+                    context: context
                 }
             };
+            const modelUrl = "https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased";
+            const cucu = window.huggingface.variables["API_KEY_2"];
+            const token = window.huggingface.variables["API_KEY_2"];
             console.log("key : " + cucu);
+            const headers = {
+                "Authorization": `Bearer ${token}`,
+                "Content-Type": "application/json"
+            };
             try {
+                const response = await fetch(modelUrl, {
                     method: 'POST',
+                    headers: headers,
                     body: JSON.stringify(data)
                 });
                 if (response.ok) {
                     document.getElementById('response').innerText = result.answer;
                 } else {
+                    document.getElementById('response').innerText = `Error: ${result.error}`;
                 }
             } catch (error) {
                 console.error('Error al hacer la consulta:', error);
+                document.getElementById('response').innerText = `Error al procesar la solicitud: ${error.message}`;
             }
         }
     </script>