Spaces:

eduardmtz
/

www

Running

App Files Files Community

eduardmtz commited on Dec 27, 2024

Commit

6adb396

verified ·

1 Parent(s): c9e56cc

Update test5.html

Browse files

Files changed (1) hide show

test5.html +144 -95

test5.html CHANGED Viewed

@@ -3,139 +3,187 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Preguntas sobre Documentos</title>
-    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.2/mammoth.browser.min.js"></script>
 </head>
 <body>
-    <h1>Sube un archivo y haz preguntas en español o catalán</h1>
-    <input type="file" id="file-input" />
-    <br><br>
     <label for="question">Pregunta:</label>
-    <input type="text" id="question" placeholder="Escribe tu pregunta aquí">
-    <button onclick="askQuestion()">Hacer Pregunta</button>
     <h3>Respuesta:</h3>
     <div id="response"></div>
     <script>
-        // Especificar la ruta del worker de PDF.js
-        pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.worker.min.js';
-        let documentText = ''; // Para almacenar el texto extraído de los documentos
-        // Función para leer archivos PDF
-        async function readPDF(file) {
-            const reader = new FileReader();
-            reader.onload = async function(event) {
-                const loadingTask = pdfjsLib.getDocument(event.target.result);
-                loadingTask.promise.then(function(pdf) {
-                    let text = '';
-                    const numPages = pdf.numPages;
-                    for (let pageNum = 1; pageNum <= numPages; pageNum++) {
-                        pdf.getPage(pageNum).then(function(page) {
-                            page.getTextContent().then(function(textContent) {
-                                textContent.items.forEach(function(item) {
-                                    text += item.str + ' ';
-                                });
-                                documentText = text; // Guardamos el texto
-                            });
-                        });
-                    }
                 });
-            };
-            reader.readAsArrayBuffer(file);
         }
-        // Función para leer archivos de texto (TXT)
-        function readTXT(file) {
-            const reader = new FileReader();
-            reader.onload = function(event) {
-                documentText = event.target.result; // Guardamos el texto
-            };
-            reader.readAsText(file);
         }
-        // Función para leer archivos de Word (DOCX)
-        function readWord(file) {
-            const reader = new FileReader();
-            reader.onload = function(event) {
-                mammoth.convertToHtml({ arrayBuffer: event.target.result })
-                    .then(function(result) {
-                        documentText = result.value; // Guardamos el texto
-                    })
-                    .catch(function(err) {
-                        console.log(err);
-                    });
-            };
-            reader.readAsArrayBuffer(file);
         }
-        // Lógica para cargar el archivo seleccionado
-        document.getElementById('file-input').addEventListener('change', function(event) {
-            const file = event.target.files[0];
-            const fileType = file.type;
-            if (fileType === 'application/pdf') {
-                readPDF(file);
-            } else if (fileType === 'text/plain') {
-                readTXT(file);
-            } else if (fileType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
-                readWord(file);
-            } else {
-                alert('Tipo de archivo no soportado');
-            }
-        });
-        // Función para hacer preguntas al modelo de Hugging Face
         async function askQuestion() {
             const question = document.getElementById('question').value;
-            if (!documentText || !question) {
-                alert('Asegúrate de que se haya cargado un archivo y que hayas ingresado una pregunta.');
                 return;
             }
-            // Usamos la API de Hugging Face para el modelo multilingüe de preguntas y respuestas
-            //const modelUrl = 'https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased-distilled-squad';
-            const modelUrl = 'https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased'; // Usa el modelo DistilBERT multilingüe
-            // Preparamos los datos para la consulta
             const data = {
                 inputs: {
-                    question: question,
-                    context: documentText
                 }
             };
-            console.log("Enviando datos: ", data); // Verificación de los datos enviados
-            const cucu = window.huggingface.variables["API_KEY_2"];
-            console.log("key : " + cucu);
             try {
-                // Hacemos la petición a la API de Hugging Face
-                const response = await fetch(modelUrl, {
                     method: 'POST',
                     headers: {
-                        'Authorization': 'Bearer ' + cucu,  // Sustituye por tu API key de Hugging Face
                         'Content-Type': 'application/json'
                     },
                     body: JSON.stringify(data)
                 });
-                if (!response.ok) {
-                    throw new Error(`Error: ${response.statusText} - ${await response.text()}`);
-                }
                 const result = await response.json();
-                const answer = result?.answer || 'No pude encontrar una respuesta.';
-                document.getElementById('response').innerHTML = answer;
             } catch (error) {
                 console.error('Error al hacer la consulta:', error);
-                alert('Hubo un error al procesar la solicitud: ' + error.message);
             }
         }
     </script>
@@ -143,3 +191,4 @@
 </html>

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Preguntas y Respuestas con DistilBERT Multilingual</title>
+    <style>
+        textarea {
+            width: 100%;
+            height: 300px;
+            margin-top: 10px;
+        }
+    </style>
 </head>
 <body>
+    <h1>Preguntas sobre un Documento</h1>
+    <!-- Formulario para cargar archivos (permitir carga múltiple) -->
+    <input type="file" id="fileInput" accept=".pdf,.txt,.docx" multiple>
+    <button onclick="processFiles()">Cargar y Analizar Archivos</button>
+    <!-- Área de texto para la pregunta -->
+    <br><br>
     <label for="question">Pregunta:</label>
+    <input type="text" id="question" placeholder="Escribe tu pregunta aquí" />
+    <!-- Botón para enviar la pregunta -->
+    <button onclick="askQuestion()">Enviar Pregunta</button>
+    <!-- Área para mostrar el texto extraído de los archivos -->
+    <h3>Texto Extraído:</h3>
+    <textarea id="extractedText" readonly></textarea>
+    <!-- Área para mostrar la respuesta -->
     <h3>Respuesta:</h3>
     <div id="response"></div>
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/es5/build/pdf.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jszip.min.js"></script>
     <script>
+        // Global variable to hold extracted text
+        let extractedText = '';
+        // Function to process the uploaded files (PDF, TXT, DOCX)
+        function processFiles() {
+            const fileInput = document.getElementById('fileInput');
+            const files = fileInput.files;
+            if (files.length === 0) {
+                alert('Por favor, selecciona al menos un archivo.');
+                return;
+            }
+            extractedText = ''; // Reset the extracted text
+            let filePromises = [];
+            // Process each file based on its type
+            for (let file of files) {
+                const fileExtension = file.name.split('.').pop().toLowerCase();
+                const reader = new FileReader();
+                const promise = new Promise((resolve, reject) => {
+                    reader.onload = function(event) {
+                        const fileContent = event.target.result;
+                        // Extract text based on the file type
+                        if (fileExtension === 'pdf') {
+                            extractTextFromPDF(fileContent, resolve);
+                        } else if (fileExtension === 'txt') {
+                            extractTextFromTXT(fileContent, resolve);
+                        } else if (fileExtension === 'docx') {
+                            extractTextFromDOCX(fileContent, resolve);
+                        } else {
+                            reject(`Formato de archivo no soportado: ${file.name}`);
+                        }
+                    };
+                    reader.readAsArrayBuffer(file);
+                });
+                filePromises.push(promise);
+            }
+            // Wait for all file promises to finish
+            Promise.all(filePromises)
+                .then(() => {
+                    // Display extracted text in the textarea
+                    document.getElementById('extractedText').value = extractedText;
+                    alert('Texto extraído de los archivos.');
+                })
+                .catch((error) => {
+                    console.error('Error al procesar los archivos:', error);
+                    alert('Hubo un error al procesar los archivos.');
                 });
         }
+        // Function to extract text from a PDF file
+        function extractTextFromPDF(fileContent, resolve) {
+            const loadingTask = pdfjsLib.getDocument({ data: fileContent });
+            loadingTask.promise.then(function(pdf) {
+                let text = '';
+                const numPages = pdf.numPages;
+                let pagePromises = [];
+                for (let i = 1; i <= numPages; i++) {
+                    pagePromises.push(pdf.getPage(i).then(function(page) {
+                        return page.getTextContent().then(function(textContent) {
+                            text += textContent.items.map(item => item.str).join(' ') + '\n';
+                        });
+                    }));
+                }
+                Promise.all(pagePromises).then(function() {
+                    extractedText += text + '\n'; // Add the extracted text
+                    resolve();
+                });
+            }).catch(function(error) {
+                console.error('Error al extraer texto del PDF:', error);
+                resolve();
+            });
         }
+        // Function to extract text from a TXT file
+        function extractTextFromTXT(fileContent, resolve) {
+            extractedText += fileContent + '\n'; // Add the extracted text
+            resolve();
         }
+        // Function to extract text from a DOCX file
+        function extractTextFromDOCX(fileContent, resolve) {
+            const zip = new JSZip();
+            zip.loadAsync(fileContent).then(function(zip) {
+                zip.file('word/document.xml').async('string').then(function(content) {
+                    const parser = new DOMParser();
+                    const xmlDoc = parser.parseFromString(content, 'text/xml');
+                    const texts = xmlDoc.getElementsByTagName('w:t');
+                    extractedText += Array.from(texts).map(t => t.textContent).join(' ') + '\n';
+                    resolve();
+                }).catch(function(error) {
+                    console.error('Error al extraer texto del DOCX:', error);
+                    resolve();
+                });
+            });
+        }
+        // Function to validate inputs and send the question to Hugging Face API
         async function askQuestion() {
             const question = document.getElementById('question').value;
+            const context = extractedText;
+            // Validate if question and context are non-empty strings
+            if (typeof question !== 'string' || typeof context !== 'string' || question.trim() === '' || context.trim() === '') {
+                alert('Por favor, ingresa una pregunta y asegúrate de que el contexto no esté vacío.');
                 return;
             }
+            const cucu = window.huggingface.variables["API_KEY_2"];
+            console.log("key : " + cucu);
+            // Prepare the request data
             const data = {
                 inputs: {
+                    question: question,  // Should be a string
+                    context: context     // Should be a string
                 }
             };
             try {
+                const response = await fetch('https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased', {
                     method: 'POST',
                     headers: {
+                        'Authorization': 'Bearer ' + cucu', // Replace with your Hugging Face API key
                         'Content-Type': 'application/json'
                     },
                     body: JSON.stringify(data)
                 });
                 const result = await response.json();
+                if (response.ok) {
+                    document.getElementById('response').innerText = result.answer;
+                } else {
+                    console.error('Error en la respuesta:', result);
+                    alert(`Hubo un error al procesar la solicitud: ${JSON.stringify(result)}`);
+                }
             } catch (error) {
                 console.error('Error al hacer la consulta:', error);
+                alert('Hubo un error al procesar la solicitud.');
             }
         }
     </script>
 </html>