Spaces:
Running
Running
Update test5.html
Browse files- test5.html +88 -112
test5.html
CHANGED
@@ -3,163 +3,135 @@
|
|
3 |
<head>
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
-
<title>
|
7 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
|
8 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
|
9 |
-
<style>
|
10 |
-
body { font-family: Arial, sans-serif; margin: 20px; }
|
11 |
-
#output { margin-top: 20px; }
|
12 |
-
textarea { width: 100%; height: 300px; }
|
13 |
-
</style>
|
14 |
</head>
|
15 |
<body>
|
16 |
-
|
17 |
-
<
|
18 |
-
|
19 |
-
|
20 |
-
<
|
21 |
-
|
22 |
-
<textarea id="output" placeholder="Texto extra铆do del archivo..."></textarea>
|
23 |
<br><br>
|
24 |
-
|
25 |
-
<
|
26 |
-
<input type="text" id="question" placeholder="Escribe tu pregunta aqu铆...">
|
27 |
<br><br>
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
<
|
30 |
-
|
31 |
-
<div id="response"></div>
|
32 |
|
33 |
<script>
|
34 |
-
// Funci贸n para
|
35 |
-
function
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
}
|
38 |
|
39 |
-
//
|
40 |
-
function
|
41 |
-
const
|
42 |
-
|
43 |
-
let
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
|
|
|
|
|
46 |
const reader = new FileReader();
|
47 |
-
reader.onload = function(
|
48 |
-
|
49 |
-
|
50 |
-
if (fileType === 'txt') {
|
51 |
-
extractedText += cleanText(e.target.result) + '\n';
|
52 |
-
} else if (fileType === 'pdf') {
|
53 |
-
extractTextFromPDF(e.target.result).then(text => {
|
54 |
-
extractedText += cleanText(text) + '\n';
|
55 |
-
output.value = extractedText;
|
56 |
-
});
|
57 |
-
} else if (fileType === 'docx') {
|
58 |
-
extractTextFromDOCX(e.target.result).then(text => {
|
59 |
-
extractedText += cleanText(text) + '\n';
|
60 |
-
output.value = extractedText;
|
61 |
-
});
|
62 |
-
}
|
63 |
};
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
reader.readAsText(file);
|
69 |
-
}
|
70 |
});
|
71 |
}
|
72 |
|
73 |
-
//
|
74 |
-
function
|
75 |
return new Promise((resolve, reject) => {
|
76 |
-
const
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
});
|
87 |
-
|
88 |
-
if (pageNum < pdf.numPages) {
|
89 |
-
extractPageText(pageNum + 1);
|
90 |
-
} else {
|
91 |
-
resolve(text);
|
92 |
-
}
|
93 |
-
});
|
94 |
-
});
|
95 |
-
}
|
96 |
-
|
97 |
-
extractPageText(pageNumber);
|
98 |
-
}, reject);
|
99 |
});
|
100 |
}
|
101 |
|
102 |
-
//
|
103 |
-
|
104 |
-
return new Promise((resolve, reject) => {
|
105 |
-
JSZip.loadAsync(docxData).then(function(zip) {
|
106 |
-
const xmlFile = zip.file("word/document.xml");
|
107 |
-
if (!xmlFile) {
|
108 |
-
reject("Archivo XML no encontrado.");
|
109 |
-
} else {
|
110 |
-
xmlFile.async("string").then(function(xmlText) {
|
111 |
-
const parser = new DOMParser();
|
112 |
-
const xmlDoc = parser.parseFromString(xmlText, "text/xml");
|
113 |
-
let text = '';
|
114 |
-
const paragraphs = xmlDoc.getElementsByTagName('w:t');
|
115 |
-
|
116 |
-
for (let i = 0; i < paragraphs.length; i++) {
|
117 |
-
text += paragraphs[i].textContent + ' ';
|
118 |
-
}
|
119 |
-
resolve(text);
|
120 |
-
});
|
121 |
-
}
|
122 |
-
});
|
123 |
-
});
|
124 |
-
}
|
125 |
|
126 |
-
// Funci贸n para hacer
|
127 |
async function askQuestion() {
|
128 |
-
const question = document.getElementById(
|
129 |
-
const context = document.getElementById(
|
130 |
|
131 |
if (!question || !context) {
|
132 |
alert("Por favor, aseg煤rate de que hay texto y una pregunta.");
|
133 |
return;
|
134 |
}
|
135 |
|
|
|
|
|
|
|
|
|
|
|
136 |
const data = {
|
137 |
-
|
138 |
-
|
139 |
-
context: context
|
140 |
-
}
|
141 |
};
|
142 |
|
143 |
-
const modelUrl = "https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased";
|
144 |
-
const cucu = window.huggingface.variables["API_KEY_2"];
|
145 |
-
const token = window.huggingface.variables["API_KEY_2"];
|
146 |
-
console.log("key : " + cucu);
|
147 |
const headers = {
|
148 |
"Authorization": `Bearer ${token}`,
|
149 |
"Content-Type": "application/json"
|
150 |
};
|
151 |
|
152 |
try {
|
|
|
153 |
const response = await fetch(modelUrl, {
|
154 |
method: 'POST',
|
155 |
headers: headers,
|
156 |
-
body: JSON.stringify(data)
|
157 |
});
|
158 |
|
159 |
const result = await response.json();
|
|
|
160 |
if (response.ok) {
|
|
|
161 |
document.getElementById('response').innerText = result.answer;
|
162 |
} else {
|
|
|
163 |
document.getElementById('response').innerText = `Error: ${result.error}`;
|
164 |
}
|
165 |
} catch (error) {
|
@@ -168,6 +140,10 @@
|
|
168 |
}
|
169 |
}
|
170 |
</script>
|
|
|
|
|
|
|
|
|
171 |
</body>
|
172 |
</html>
|
173 |
|
|
|
3 |
<head>
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Consulta con DistilBERT Multilingual</title>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
</head>
|
8 |
<body>
|
9 |
+
|
10 |
+
<h1>Consulta con DistilBERT Multilingual</h1>
|
11 |
+
|
12 |
+
<!-- Cargar archivo y mostrar texto -->
|
13 |
+
<input type="file" id="fileInput" accept=".txt,.pdf,.doc,.docx" multiple />
|
|
|
|
|
14 |
<br><br>
|
15 |
+
|
16 |
+
<textarea id="textOutput" rows="10" cols="100" placeholder="El texto extra铆do aparecer谩 aqu铆..." readonly></textarea>
|
|
|
17 |
<br><br>
|
18 |
+
|
19 |
+
<!-- Caja de preguntas -->
|
20 |
+
<input type="text" id="questionInput" placeholder="Escribe tu pregunta aqu铆" />
|
21 |
+
<button onclick="askQuestion()">Hacer Pregunta</button>
|
22 |
|
23 |
+
<h3>Respuesta:</h3>
|
24 |
+
<p id="response"></p>
|
|
|
25 |
|
26 |
<script>
|
27 |
+
// Funci贸n para extraer texto de archivos PDF, TXT y DOCX
|
28 |
+
async function handleFileUpload(event) {
|
29 |
+
const files = event.target.files;
|
30 |
+
let allText = "";
|
31 |
+
|
32 |
+
for (let file of files) {
|
33 |
+
const fileType = file.type;
|
34 |
+
|
35 |
+
if (fileType === "application/pdf") {
|
36 |
+
allText += await extractTextFromPDF(file);
|
37 |
+
} else if (fileType === "text/plain") {
|
38 |
+
allText += await extractTextFromTXT(file);
|
39 |
+
} else if (fileType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
40 |
+
allText += await extractTextFromDOCX(file);
|
41 |
+
} else {
|
42 |
+
alert("Archivo no compatible. Solo se aceptan PDF, TXT y DOCX.");
|
43 |
+
}
|
44 |
+
}
|
45 |
+
|
46 |
+
// Mostrar el texto extra铆do en el textarea
|
47 |
+
document.getElementById("textOutput").value = allText;
|
48 |
}
|
49 |
|
50 |
+
// Extraer texto de PDF
|
51 |
+
async function extractTextFromPDF(file) {
|
52 |
+
const pdf = await pdfjsLib.getDocument(URL.createObjectURL(file)).promise;
|
53 |
+
let text = "";
|
54 |
+
for (let i = 0; i < pdf.numPages; i++) {
|
55 |
+
const page = await pdf.getPage(i + 1);
|
56 |
+
const content = await page.getTextContent();
|
57 |
+
text += content.items.map(item => item.str).join(" ") + "\n";
|
58 |
+
}
|
59 |
+
return text;
|
60 |
+
}
|
61 |
|
62 |
+
// Extraer texto de TXT
|
63 |
+
async function extractTextFromTXT(file) {
|
64 |
+
return new Promise((resolve, reject) => {
|
65 |
const reader = new FileReader();
|
66 |
+
reader.onload = function(event) {
|
67 |
+
resolve(event.target.result);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
};
|
69 |
+
reader.onerror = function(error) {
|
70 |
+
reject(error);
|
71 |
+
};
|
72 |
+
reader.readAsText(file);
|
|
|
|
|
73 |
});
|
74 |
}
|
75 |
|
76 |
+
// Extraer texto de DOCX
|
77 |
+
async function extractTextFromDOCX(file) {
|
78 |
return new Promise((resolve, reject) => {
|
79 |
+
const reader = new FileReader();
|
80 |
+
reader.onload = function(event) {
|
81 |
+
const doc = new window.Docxtemplater(new window.PizZip(event.target.result));
|
82 |
+
const text = doc.getFullText(); // Extrae todo el texto
|
83 |
+
resolve(text);
|
84 |
+
};
|
85 |
+
reader.onerror = function(error) {
|
86 |
+
reject(error);
|
87 |
+
};
|
88 |
+
reader.readAsBinaryString(file);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
});
|
90 |
}
|
91 |
|
92 |
+
// Detectar cuando se suben archivos
|
93 |
+
document.getElementById("fileInput").addEventListener("change", handleFileUpload);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
+
// Funci贸n para hacer la consulta
|
96 |
async function askQuestion() {
|
97 |
+
const question = document.getElementById("questionInput").value;
|
98 |
+
const context = document.getElementById("textOutput").value;
|
99 |
|
100 |
if (!question || !context) {
|
101 |
alert("Por favor, aseg煤rate de que hay texto y una pregunta.");
|
102 |
return;
|
103 |
}
|
104 |
|
105 |
+
// Mostrar mensaje de espera si el modelo se est谩 cargando
|
106 |
+
const modelUrl = "https://api-inference.huggingface.co/models/distilbert-base-multilingual-cased";
|
107 |
+
const token = window.huggingface.variables["API_KEY_2"];
|
108 |
+
|
109 |
+
// Datos a enviar al modelo
|
110 |
const data = {
|
111 |
+
question: question, // Pregunta como cadena de texto
|
112 |
+
context: context // Contexto como cadena de texto
|
|
|
|
|
113 |
};
|
114 |
|
|
|
|
|
|
|
|
|
115 |
const headers = {
|
116 |
"Authorization": `Bearer ${token}`,
|
117 |
"Content-Type": "application/json"
|
118 |
};
|
119 |
|
120 |
try {
|
121 |
+
// Realizar la consulta a la API de Hugging Face
|
122 |
const response = await fetch(modelUrl, {
|
123 |
method: 'POST',
|
124 |
headers: headers,
|
125 |
+
body: JSON.stringify(data) // Convierte los datos a JSON
|
126 |
});
|
127 |
|
128 |
const result = await response.json();
|
129 |
+
|
130 |
if (response.ok) {
|
131 |
+
// Mostrar la respuesta obtenida
|
132 |
document.getElementById('response').innerText = result.answer;
|
133 |
} else {
|
134 |
+
// Mostrar mensaje de error
|
135 |
document.getElementById('response').innerText = `Error: ${result.error}`;
|
136 |
}
|
137 |
} catch (error) {
|
|
|
140 |
}
|
141 |
}
|
142 |
</script>
|
143 |
+
|
144 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
|
145 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/docxtemplater/3.20.0/docxtemplater.js"></script>
|
146 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pizzip/3.0.6/pizzip.min.js"></script>
|
147 |
</body>
|
148 |
</html>
|
149 |
|