Spaces:

eduardmtz
/

www

Running

App Files Files Community

eduardmtz commited on Dec 28, 2024

Commit

e2b20b6

verified ·

1 Parent(s): 41242e5

Update test5.html

Browse files

Files changed (1) hide show

test5.html +88 -112

test5.html CHANGED Viewed

@@ -3,163 +3,135 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Extracción de Texto y Preguntas con Modelo Multilingüe</title>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        #output { margin-top: 20px; }
-        textarea { width: 100%; height: 300px; }
-    </style>
 </head>
 <body>
-    <h1>Sube tu archivo (PDF, TXT, DOCX) y haz preguntas</h1>
-    <input type="file" id="fileInput" accept=".pdf,.txt,.docx" multiple>
-    <br><br>
-    <button onclick="processFiles()">Cargar y procesar archivos</button>
-    <br><br>
-    <textarea id="output" placeholder="Texto extraído del archivo..."></textarea>
     <br><br>
-    <label for="question">Pregunta:</label>
-    <input type="text" id="question" placeholder="Escribe tu pregunta aquí...">
     <br><br>
-    <button onclick="askQuestion()">Preguntar</button>
-    <div id="response"></div>
     <script>
-        // Función para limpiar el texto extraído
-        function cleanText(text) {
-            return text.replace(/\s+/g, ' ').trim(); // Elimina espacios y saltos de línea innecesarios
         }
-        // Función para procesar los archivos cargados
-        function processFiles() {
-            const fileInput = document.getElementById('fileInput');
-            const output = document.getElementById('output');
-            let extractedText = '';
-            Array.from(fileInput.files).forEach(file => {
                 const reader = new FileReader();
-                reader.onload = function(e) {
-                    const fileType = file.name.split('.').pop().toLowerCase();
-                    if (fileType === 'txt') {
-                        extractedText += cleanText(e.target.result) + '\n';
-                    } else if (fileType === 'pdf') {
-                        extractTextFromPDF(e.target.result).then(text => {
-                            extractedText += cleanText(text) + '\n';
-                            output.value = extractedText;
-                        });
-                    } else if (fileType === 'docx') {
-                        extractTextFromDOCX(e.target.result).then(text => {
-                            extractedText += cleanText(text) + '\n';
-                            output.value = extractedText;
-                        });
-                    }
                 };
-                if (file.type === 'application/pdf') {
-                    reader.readAsArrayBuffer(file);
-                } else {
-                    reader.readAsText(file);
-                }
             });
         }
-        // Función para extraer texto de un archivo PDF
-        function extractTextFromPDF(pdfData) {
             return new Promise((resolve, reject) => {
-                const loadingTask = pdfjsLib.getDocument({data: pdfData});
-                loadingTask.promise.then(pdf => {
-                    let text = '';
-                    let pageNumber = 1;
-                    function extractPageText(pageNum) {
-                        pdf.getPage(pageNum).then(page => {
-                            page.getTextContent().then(content => {
-                                content.items.forEach(item => {
-                                    text += item.str + ' ';
-                                });
-                                if (pageNum < pdf.numPages) {
-                                    extractPageText(pageNum + 1);
-                                } else {
-                                    resolve(text);
-                                }
-                            });
-                        });
-                    }
-                    extractPageText(pageNumber);
-                }, reject);
             });
         }
-        // Función para extraer texto de un archivo DOCX
-        function extractTextFromDOCX(docxData) {
-            return new Promise((resolve, reject) => {
-                JSZip.loadAsync(docxData).then(function(zip) {
-                    const xmlFile = zip.file("word/document.xml");
-                    if (!xmlFile) {
-                        reject("Archivo XML no encontrado.");
-                    } else {
-                        xmlFile.async("string").then(function(xmlText) {
-                            const parser = new DOMParser();
-                            const xmlDoc = parser.parseFromString(xmlText, "text/xml");
-                            let text = '';
-                            const paragraphs = xmlDoc.getElementsByTagName('w:t');
-                            for (let i = 0; i < paragraphs.length; i++) {
-                                text += paragraphs[i].textContent + ' ';
-                            }
-                            resolve(text);
-                        });
-                    }
-                });
-            });
-        }
-        // Función para hacer preguntas al modelo de Hugging Face
         async function askQuestion() {
-            const question = document.getElementById('question').value;
-            const context = document.getElementById('output').value;
             if (!question || !context) {
                 alert("Por favor, asegúrate de que hay texto y una pregunta.");
                 return;
             }
             const data = {
-                inputs: {
-                    question: question,
-                    context: context
-                }
             };
-            const modelUrl = "https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased";
-            const cucu = window.huggingface.variables["API_KEY_2"];
-            const token = window.huggingface.variables["API_KEY_2"];
-            console.log("key : " + cucu);
             const headers = {
                 "Authorization": `Bearer ${token}`,
                 "Content-Type": "application/json"
             };
             try {
                 const response = await fetch(modelUrl, {
                     method: 'POST',
                     headers: headers,
-                    body: JSON.stringify(data)
                 });
                 const result = await response.json();
                 if (response.ok) {
                     document.getElementById('response').innerText = result.answer;
                 } else {
                     document.getElementById('response').innerText = `Error: ${result.error}`;
                 }
             } catch (error) {
@@ -168,6 +140,10 @@
             }
         }
     </script>
 </body>
 </html>

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Consulta con DistilBERT Multilingual</title>
 </head>
 <body>
+    <h1>Consulta con DistilBERT Multilingual</h1>
+    <!-- Cargar archivo y mostrar texto -->
+    <input type="file" id="fileInput" accept=".txt,.pdf,.doc,.docx" multiple />
     <br><br>
+    <textarea id="textOutput" rows="10" cols="100" placeholder="El texto extraído aparecerá aquí..." readonly></textarea>
     <br><br>
+    <!-- Caja de preguntas -->
+    <input type="text" id="questionInput" placeholder="Escribe tu pregunta aquí" />
+    <button onclick="askQuestion()">Hacer Pregunta</button>
+    <h3>Respuesta:</h3>
+    <p id="response"></p>
     <script>
+        // Función para extraer texto de archivos PDF, TXT y DOCX
+        async function handleFileUpload(event) {
+            const files = event.target.files;
+            let allText = "";
+            for (let file of files) {
+                const fileType = file.type;
+                if (fileType === "application/pdf") {
+                    allText += await extractTextFromPDF(file);
+                } else if (fileType === "text/plain") {
+                    allText += await extractTextFromTXT(file);
+                } else if (fileType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
+                    allText += await extractTextFromDOCX(file);
+                } else {
+                    alert("Archivo no compatible. Solo se aceptan PDF, TXT y DOCX.");
+                }
+            }
+            // Mostrar el texto extraído en el textarea
+            document.getElementById("textOutput").value = allText;
         }
+        // Extraer texto de PDF
+        async function extractTextFromPDF(file) {
+            const pdf = await pdfjsLib.getDocument(URL.createObjectURL(file)).promise;
+            let text = "";
+            for (let i = 0; i < pdf.numPages; i++) {
+                const page = await pdf.getPage(i + 1);
+                const content = await page.getTextContent();
+                text += content.items.map(item => item.str).join(" ") + "\n";
+            }
+            return text;
+        }
+        // Extraer texto de TXT
+        async function extractTextFromTXT(file) {
+            return new Promise((resolve, reject) => {
                 const reader = new FileReader();
+                reader.onload = function(event) {
+                    resolve(event.target.result);
                 };
+                reader.onerror = function(error) {
+                    reject(error);
+                };
+                reader.readAsText(file);
             });
         }
+        // Extraer texto de DOCX
+        async function extractTextFromDOCX(file) {
             return new Promise((resolve, reject) => {
+                const reader = new FileReader();
+                reader.onload = function(event) {
+                    const doc = new window.Docxtemplater(new window.PizZip(event.target.result));
+                    const text = doc.getFullText(); // Extrae todo el texto
+                    resolve(text);
+                };
+                reader.onerror = function(error) {
+                    reject(error);
+                };
+                reader.readAsBinaryString(file);
             });
         }
+        // Detectar cuando se suben archivos
+        document.getElementById("fileInput").addEventListener("change", handleFileUpload);
+        // Función para hacer la consulta
         async function askQuestion() {
+            const question = document.getElementById("questionInput").value;
+            const context = document.getElementById("textOutput").value;
             if (!question || !context) {
                 alert("Por favor, asegúrate de que hay texto y una pregunta.");
                 return;
             }
+            // Mostrar mensaje de espera si el modelo se está cargando
+            const modelUrl = "https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased";
+            const token = window.huggingface.variables["API_KEY_2"];
+            // Datos a enviar al modelo
             const data = {
+                question: question, // Pregunta como cadena de texto
+                context: context    // Contexto como cadena de texto
             };
             const headers = {
                 "Authorization": `Bearer ${token}`,
                 "Content-Type": "application/json"
             };
             try {
+                // Realizar la consulta a la API de Hugging Face
                 const response = await fetch(modelUrl, {
                     method: 'POST',
                     headers: headers,
+                    body: JSON.stringify(data) // Convierte los datos a JSON
                 });
                 const result = await response.json();
                 if (response.ok) {
+                    // Mostrar la respuesta obtenida
                     document.getElementById('response').innerText = result.answer;
                 } else {
+                    // Mostrar mensaje de error
                     document.getElementById('response').innerText = `Error: ${result.error}`;
                 }
             } catch (error) {
             }
         }
     </script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/docxtemplater/3.20.0/docxtemplater.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/pizzip/3.0.6/pizzip.min.js"></script>
 </body>
 </html>