Spaces:
Running
Running
Update test5.html
Browse files- test5.html +88 -112
test5.html
CHANGED
|
@@ -3,163 +3,135 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>
|
| 7 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
| 8 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
|
| 9 |
-
<style>
|
| 10 |
-
body { font-family: Arial, sans-serif; margin: 20px; }
|
| 11 |
-
#output { margin-top: 20px; }
|
| 12 |
-
textarea { width: 100%; height: 300px; }
|
| 13 |
-
</style>
|
| 14 |
</head>
|
| 15 |
<body>
|
| 16 |
-
|
| 17 |
-
<
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
<
|
| 21 |
-
|
| 22 |
-
<textarea id="output" placeholder="Texto extra铆do del archivo..."></textarea>
|
| 23 |
<br><br>
|
| 24 |
-
|
| 25 |
-
<
|
| 26 |
-
<input type="text" id="question" placeholder="Escribe tu pregunta aqu铆...">
|
| 27 |
<br><br>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
<
|
| 30 |
-
|
| 31 |
-
<div id="response"></div>
|
| 32 |
|
| 33 |
<script>
|
| 34 |
-
// Funci贸n para
|
| 35 |
-
function
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
|
| 39 |
-
//
|
| 40 |
-
function
|
| 41 |
-
const
|
| 42 |
-
|
| 43 |
-
let
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
const reader = new FileReader();
|
| 47 |
-
reader.onload = function(
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
if (fileType === 'txt') {
|
| 51 |
-
extractedText += cleanText(e.target.result) + '\n';
|
| 52 |
-
} else if (fileType === 'pdf') {
|
| 53 |
-
extractTextFromPDF(e.target.result).then(text => {
|
| 54 |
-
extractedText += cleanText(text) + '\n';
|
| 55 |
-
output.value = extractedText;
|
| 56 |
-
});
|
| 57 |
-
} else if (fileType === 'docx') {
|
| 58 |
-
extractTextFromDOCX(e.target.result).then(text => {
|
| 59 |
-
extractedText += cleanText(text) + '\n';
|
| 60 |
-
output.value = extractedText;
|
| 61 |
-
});
|
| 62 |
-
}
|
| 63 |
};
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
reader.readAsText(file);
|
| 69 |
-
}
|
| 70 |
});
|
| 71 |
}
|
| 72 |
|
| 73 |
-
//
|
| 74 |
-
function
|
| 75 |
return new Promise((resolve, reject) => {
|
| 76 |
-
const
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
});
|
| 87 |
-
|
| 88 |
-
if (pageNum < pdf.numPages) {
|
| 89 |
-
extractPageText(pageNum + 1);
|
| 90 |
-
} else {
|
| 91 |
-
resolve(text);
|
| 92 |
-
}
|
| 93 |
-
});
|
| 94 |
-
});
|
| 95 |
-
}
|
| 96 |
-
|
| 97 |
-
extractPageText(pageNumber);
|
| 98 |
-
}, reject);
|
| 99 |
});
|
| 100 |
}
|
| 101 |
|
| 102 |
-
//
|
| 103 |
-
|
| 104 |
-
return new Promise((resolve, reject) => {
|
| 105 |
-
JSZip.loadAsync(docxData).then(function(zip) {
|
| 106 |
-
const xmlFile = zip.file("word/document.xml");
|
| 107 |
-
if (!xmlFile) {
|
| 108 |
-
reject("Archivo XML no encontrado.");
|
| 109 |
-
} else {
|
| 110 |
-
xmlFile.async("string").then(function(xmlText) {
|
| 111 |
-
const parser = new DOMParser();
|
| 112 |
-
const xmlDoc = parser.parseFromString(xmlText, "text/xml");
|
| 113 |
-
let text = '';
|
| 114 |
-
const paragraphs = xmlDoc.getElementsByTagName('w:t');
|
| 115 |
-
|
| 116 |
-
for (let i = 0; i < paragraphs.length; i++) {
|
| 117 |
-
text += paragraphs[i].textContent + ' ';
|
| 118 |
-
}
|
| 119 |
-
resolve(text);
|
| 120 |
-
});
|
| 121 |
-
}
|
| 122 |
-
});
|
| 123 |
-
});
|
| 124 |
-
}
|
| 125 |
|
| 126 |
-
// Funci贸n para hacer
|
| 127 |
async function askQuestion() {
|
| 128 |
-
const question = document.getElementById(
|
| 129 |
-
const context = document.getElementById(
|
| 130 |
|
| 131 |
if (!question || !context) {
|
| 132 |
alert("Por favor, aseg煤rate de que hay texto y una pregunta.");
|
| 133 |
return;
|
| 134 |
}
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
const data = {
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
context: context
|
| 140 |
-
}
|
| 141 |
};
|
| 142 |
|
| 143 |
-
const modelUrl = "https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased";
|
| 144 |
-
const cucu = window.huggingface.variables["API_KEY_2"];
|
| 145 |
-
const token = window.huggingface.variables["API_KEY_2"];
|
| 146 |
-
console.log("key : " + cucu);
|
| 147 |
const headers = {
|
| 148 |
"Authorization": `Bearer ${token}`,
|
| 149 |
"Content-Type": "application/json"
|
| 150 |
};
|
| 151 |
|
| 152 |
try {
|
|
|
|
| 153 |
const response = await fetch(modelUrl, {
|
| 154 |
method: 'POST',
|
| 155 |
headers: headers,
|
| 156 |
-
body: JSON.stringify(data)
|
| 157 |
});
|
| 158 |
|
| 159 |
const result = await response.json();
|
|
|
|
| 160 |
if (response.ok) {
|
|
|
|
| 161 |
document.getElementById('response').innerText = result.answer;
|
| 162 |
} else {
|
|
|
|
| 163 |
document.getElementById('response').innerText = `Error: ${result.error}`;
|
| 164 |
}
|
| 165 |
} catch (error) {
|
|
@@ -168,6 +140,10 @@
|
|
| 168 |
}
|
| 169 |
}
|
| 170 |
</script>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
</body>
|
| 172 |
</html>
|
| 173 |
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Consulta con DistilBERT Multilingual</title>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
</head>
|
| 8 |
<body>
|
| 9 |
+
|
| 10 |
+
<h1>Consulta con DistilBERT Multilingual</h1>
|
| 11 |
+
|
| 12 |
+
<!-- Cargar archivo y mostrar texto -->
|
| 13 |
+
<input type="file" id="fileInput" accept=".txt,.pdf,.doc,.docx" multiple />
|
|
|
|
|
|
|
| 14 |
<br><br>
|
| 15 |
+
|
| 16 |
+
<textarea id="textOutput" rows="10" cols="100" placeholder="El texto extra铆do aparecer谩 aqu铆..." readonly></textarea>
|
|
|
|
| 17 |
<br><br>
|
| 18 |
+
|
| 19 |
+
<!-- Caja de preguntas -->
|
| 20 |
+
<input type="text" id="questionInput" placeholder="Escribe tu pregunta aqu铆" />
|
| 21 |
+
<button onclick="askQuestion()">Hacer Pregunta</button>
|
| 22 |
|
| 23 |
+
<h3>Respuesta:</h3>
|
| 24 |
+
<p id="response"></p>
|
|
|
|
| 25 |
|
| 26 |
<script>
|
| 27 |
+
// Funci贸n para extraer texto de archivos PDF, TXT y DOCX
|
| 28 |
+
async function handleFileUpload(event) {
|
| 29 |
+
const files = event.target.files;
|
| 30 |
+
let allText = "";
|
| 31 |
+
|
| 32 |
+
for (let file of files) {
|
| 33 |
+
const fileType = file.type;
|
| 34 |
+
|
| 35 |
+
if (fileType === "application/pdf") {
|
| 36 |
+
allText += await extractTextFromPDF(file);
|
| 37 |
+
} else if (fileType === "text/plain") {
|
| 38 |
+
allText += await extractTextFromTXT(file);
|
| 39 |
+
} else if (fileType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
| 40 |
+
allText += await extractTextFromDOCX(file);
|
| 41 |
+
} else {
|
| 42 |
+
alert("Archivo no compatible. Solo se aceptan PDF, TXT y DOCX.");
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
// Mostrar el texto extra铆do en el textarea
|
| 47 |
+
document.getElementById("textOutput").value = allText;
|
| 48 |
}
|
| 49 |
|
| 50 |
+
// Extraer texto de PDF
|
| 51 |
+
async function extractTextFromPDF(file) {
|
| 52 |
+
const pdf = await pdfjsLib.getDocument(URL.createObjectURL(file)).promise;
|
| 53 |
+
let text = "";
|
| 54 |
+
for (let i = 0; i < pdf.numPages; i++) {
|
| 55 |
+
const page = await pdf.getPage(i + 1);
|
| 56 |
+
const content = await page.getTextContent();
|
| 57 |
+
text += content.items.map(item => item.str).join(" ") + "\n";
|
| 58 |
+
}
|
| 59 |
+
return text;
|
| 60 |
+
}
|
| 61 |
|
| 62 |
+
// Extraer texto de TXT
|
| 63 |
+
async function extractTextFromTXT(file) {
|
| 64 |
+
return new Promise((resolve, reject) => {
|
| 65 |
const reader = new FileReader();
|
| 66 |
+
reader.onload = function(event) {
|
| 67 |
+
resolve(event.target.result);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
};
|
| 69 |
+
reader.onerror = function(error) {
|
| 70 |
+
reject(error);
|
| 71 |
+
};
|
| 72 |
+
reader.readAsText(file);
|
|
|
|
|
|
|
| 73 |
});
|
| 74 |
}
|
| 75 |
|
| 76 |
+
// Extraer texto de DOCX
|
| 77 |
+
async function extractTextFromDOCX(file) {
|
| 78 |
return new Promise((resolve, reject) => {
|
| 79 |
+
const reader = new FileReader();
|
| 80 |
+
reader.onload = function(event) {
|
| 81 |
+
const doc = new window.Docxtemplater(new window.PizZip(event.target.result));
|
| 82 |
+
const text = doc.getFullText(); // Extrae todo el texto
|
| 83 |
+
resolve(text);
|
| 84 |
+
};
|
| 85 |
+
reader.onerror = function(error) {
|
| 86 |
+
reject(error);
|
| 87 |
+
};
|
| 88 |
+
reader.readAsBinaryString(file);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
});
|
| 90 |
}
|
| 91 |
|
| 92 |
+
// Detectar cuando se suben archivos
|
| 93 |
+
document.getElementById("fileInput").addEventListener("change", handleFileUpload);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
// Funci贸n para hacer la consulta
|
| 96 |
async function askQuestion() {
|
| 97 |
+
const question = document.getElementById("questionInput").value;
|
| 98 |
+
const context = document.getElementById("textOutput").value;
|
| 99 |
|
| 100 |
if (!question || !context) {
|
| 101 |
alert("Por favor, aseg煤rate de que hay texto y una pregunta.");
|
| 102 |
return;
|
| 103 |
}
|
| 104 |
|
| 105 |
+
// Mostrar mensaje de espera si el modelo se est谩 cargando
|
| 106 |
+
const modelUrl = "https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased";
|
| 107 |
+
const token = window.huggingface.variables["API_KEY_2"];
|
| 108 |
+
|
| 109 |
+
// Datos a enviar al modelo
|
| 110 |
const data = {
|
| 111 |
+
question: question, // Pregunta como cadena de texto
|
| 112 |
+
context: context // Contexto como cadena de texto
|
|
|
|
|
|
|
| 113 |
};
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
const headers = {
|
| 116 |
"Authorization": `Bearer ${token}`,
|
| 117 |
"Content-Type": "application/json"
|
| 118 |
};
|
| 119 |
|
| 120 |
try {
|
| 121 |
+
// Realizar la consulta a la API de Hugging Face
|
| 122 |
const response = await fetch(modelUrl, {
|
| 123 |
method: 'POST',
|
| 124 |
headers: headers,
|
| 125 |
+
body: JSON.stringify(data) // Convierte los datos a JSON
|
| 126 |
});
|
| 127 |
|
| 128 |
const result = await response.json();
|
| 129 |
+
|
| 130 |
if (response.ok) {
|
| 131 |
+
// Mostrar la respuesta obtenida
|
| 132 |
document.getElementById('response').innerText = result.answer;
|
| 133 |
} else {
|
| 134 |
+
// Mostrar mensaje de error
|
| 135 |
document.getElementById('response').innerText = `Error: ${result.error}`;
|
| 136 |
}
|
| 137 |
} catch (error) {
|
|
|
|
| 140 |
}
|
| 141 |
}
|
| 142 |
</script>
|
| 143 |
+
|
| 144 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
|
| 145 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/docxtemplater/3.20.0/docxtemplater.js"></script>
|
| 146 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pizzip/3.0.6/pizzip.min.js"></script>
|
| 147 |
</body>
|
| 148 |
</html>
|
| 149 |
|