guipenedo's picture
guipenedo HF staff
updated
0741edf unverified
document.addEventListener("DOMContentLoaded", function() {
const select = document.getElementById('language-family-select');
const hoverBox = document.getElementById('hover-box');
const searchBar = document.getElementById('search-bar');
const depthSelector = document.getElementById('depth-number');
const tokenizerFilterDiv = document.getElementById('tokenizer-filter');
const scriptFiltersDiv = document.getElementById('script-filter');
const showNonGlotlidCheckbox = document.getElementById('show-non-glotlid');
const clearTokenizersFilter = document.getElementById('clear-tokenizers');
const clearScriptsFilter = document.getElementById('clear-scripts');
let nodeToCenter = null;
let currentTreeData = null;
let expandedNodes;
let currentTransform = d3.zoomIdentity; // Save the current zoom and transform
// Get unique tokenizer names
let tokenizerNames;
let scriptNames;
let color;
function setExpanded(id, state = true){
if (state) {
expandedNodes.add(id)
}
else if(expandedNodes.has(id)) {
expandedNodes.delete(id)
}
}
function addNodeIds(tree){
function addIdToSubtree(tree, nodeCount = 0){
tree.id = nodeCount++;
let tree_subtreeSize = tree.children.length === 0 ? 1 : 0;
let node = {
...tree,
children: (tree.children || []).map(child => {
const [subtree, newCount, subtreeSize] = addIdToSubtree(child, nodeCount);
nodeCount = newCount;
tree_subtreeSize += subtreeSize;
return subtree;
})
};
node.subtreeSize = tree_subtreeSize
return [node, nodeCount, tree_subtreeSize];
}
const [parsedTree, _finalCount] = addIdToSubtree(tree);
return parsedTree;
}
function loadLanguageFamily() {
const family = select.value;
fetch(`data/${family}.json`)
.then(response => response.json())
.then(data => {
currentTreeData = addNodeIds(data);
expandedNodes = new Set([0]);
updateTokenizerFilter(data);
updateScriptFilter(data);
drawVisibleNodes(true);
});
}
loadLanguageFamily();
select.addEventListener('change', loadLanguageFamily);
searchBar.addEventListener('input', () => searchNode(searchBar.value));
depthSelector.addEventListener('change', (_) => drawVisibleNodes());
showNonGlotlidCheckbox.addEventListener('click', (_) => drawVisibleNodes());
clearTokenizersFilter.addEventListener('click', (_) => {
tokenizerFilterDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false);
drawVisibleNodes();
})
clearScriptsFilter.addEventListener('click', (_) => {
scriptFiltersDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false);
drawVisibleNodes();
})
function getScriptNames(node, namesSet = new Set('x')) {
if (!node)
return namesSet;
if (node.scripts.length > 0) {
for(const script of node.scripts)
namesSet.add(script);
}
if (node.children) {
node.children.forEach(child => getScriptNames(child, namesSet));
}
return namesSet;
}
function updateScriptFilter(data) {
scriptNames = Array.from(getScriptNames(data));
scriptFiltersDiv.innerHTML = '';
scriptNames.forEach(name => {
const checkbox = document.createElement('input');
checkbox.type = 'checkbox';
checkbox.value = name;
checkbox.checked = true;
checkbox.addEventListener('change', () => drawVisibleNodes());
const label = document.createElement('label');
label.appendChild(checkbox);
label.appendChild(document.createTextNode(name));
scriptFiltersDiv.appendChild(label);
scriptFiltersDiv.appendChild(document.createElement('br'));
});
}
function updateTokenizerFilter(data) {
tokenizerNames = Array.from(getTokenizerNames(data));
// Create color scale based on the unique tokenizer names
color = d3.scaleOrdinal(tokenizerNames, d3.schemeCategory10);
tokenizerFilterDiv.innerHTML = '';
tokenizerNames.forEach(name => {
const checkbox = document.createElement('input');
checkbox.type = 'checkbox';
checkbox.value = name;
checkbox.checked = true;
checkbox.addEventListener('change', () => drawVisibleNodes());
const label = document.createElement('label');
// Create the colored icon
const icon = document.createElement('span');
icon.style.display = 'inline-block';
icon.style.width = '10px';
icon.style.height = '10px';
icon.style.backgroundColor = color(name);
icon.style.marginRight = '5px';
label.appendChild(icon); // Add the icon before the checkbox
label.appendChild(checkbox);
label.appendChild(document.createTextNode(name));
tokenizerFilterDiv.appendChild(label);
tokenizerFilterDiv.appendChild(document.createElement('br'));
});
}
function getTokenizerNames(node, namesSet = new Set('x')) {
if (!node)
return namesSet;
if (node.tokenizers) {
for(const [script, tokenizer] of Object.entries(node.tokenizers))
namesSet.add(tokenizer.original_lang_name);
}
if (node.children) {
node.children.forEach(child => getTokenizerNames(child, namesSet));
}
return namesSet;
}
function createTree(data, recenter = false) {
// Clear any existing tree
d3.select("#tree-container svg").remove();
// Set the SVG dimensions to fill the entire screen
const width = window.innerWidth;
const height = window.innerHeight;
// Create the SVG element
const svg = d3.select("#tree-container")
.append("svg")
.attr("width", width)
.attr("height", height);
const g = svg.append("g");
// Define zoom behavior without restricting translation extents
const zoom = d3.zoom()
.scaleExtent([0.1, 5])
.on("zoom", function(event) {
currentTransform = event.transform; // Save the current transform on zoom
g.attr("transform", event.transform);
});
// Create a root hierarchy node
const root = d3.hierarchy(data);
// Dynamically compute tree layout for horizontal orientation
const treeLayout = d3.tree().nodeSize([200, 100]);
treeLayout(root);
// Create links between nodes (now horizontal)
g.selectAll('.link')
.data(root.links())
.enter()
.append('path')
.attr('class', 'link')
.attr('d', d3.linkVertical()
.x(d => d.x)
.y(d => d.y))
.attr('stroke', '#ccc')
.attr('fill', 'none');
// Create nodes
const node = g.selectAll('.node')
.data(root.descendants())
.enter()
.append('g')
.attr('class', 'node')
.attr('transform', d => `translate(${d.x},${d.y})`);
const sizeScale = d3.scaleSqrt()
.domain([1, root.data.subtreeSize])
.range([5, 20]); // Adjust the range as needed for minimum and maximum circle
const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked'))
.map(input => input.value);
const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked'))
.map(input => input.value);
function getColorTokenizer(node) {
// debugger;
const toks = Object.entries(node.tokenizers).filter(([script, obj]) => selectedScripts.includes(script) && selectedTokenizers.includes(obj.original_lang_name));
return toks.length > 0 ? toks[0][1].original_lang_name : 'unknown';
}
// Add circles to nodes
node.append('circle')
.filter(d => !d.data.iso_3_code)
.attr('r', d => sizeScale(d.data.subtreeSize))
.attr('fill', d => {
const tokenizerName = getColorTokenizer(d.data);
return color(tokenizerName);
});
node.filter(d => d.data.iso_3_code && d.data.native_tokenizers.length === 0) // Select leaf nodes
.append('rect')
.attr('width', 10)
.attr('height', 10)
.attr('x', -5)
.attr('y', -5)
.attr('fill', d => {
const tokenizerName = getColorTokenizer(d.data);
return color(tokenizerName);
});
node.filter(d => d.data.native_tokenizers.length !== 0) // Select leaf nodes with "own" assignment
.append('path')
.attr('d', d3.symbol().type(d3.symbolTriangle).size(100)) // Adjust size as needed
.attr('fill', d => {
const tokenizerName = getColorTokenizer(d.data);
return color(tokenizerName);
});
// Add text labels to nodes
node.append('text')
.attr('dy', 4)
.attr('x', d => sizeScale(d.data.subtreeSize) + 4)
.attr('text-anchor', 'start')
.text(d => {
const tokenizerName = getColorTokenizer(d.data);
return `${d.data.name} - ${tokenizerName || 'x'}${d.data.iso_3_code ? '' : ' (' + d.data.subtreeSize + ')'}`
});
if (!currentTransform || recenter)
currentTransform = d3.zoomIdentity.translate(width / 2, height / 2);
if (nodeToCenter){
debugger;
let element = g.selectAll('.node').filter(d => d.data.id === nodeToCenter).data()[0];
if (element){
const x = element.x;
const y = element.y;
const scale = 1.0; // Adjust scale as needed
currentTransform = d3.zoomIdentity.translate(window.innerWidth / 2 - x, window.innerHeight / 2 - y).scale(scale);
}
nodeToCenter = null;
}
svg.call(zoom).call(zoom).call(zoom.transform, d3.zoomIdentity.translate(currentTransform.x, currentTransform.y).scale(currentTransform.k));
// Add hover event to nodes for hover-box
node.on("mouseover", function(event, d) {
hoverBox.style.display = "block";
hoverBox.style.left = (event.pageX) + "px";
hoverBox.style.top = (event.pageY) + "px";
const tokenizersList = Object.keys(d.data.tokenizers).map((script) =>
`<li><em>${script}</em>: ${d.data.tokenizers[script]['class_name']}-${d.data.tokenizers[script]['original_lang_name']}${d.data.native_tokenizers.includes(script) ? '(πŸ‘€)' :'(πŸ€–)'}</li>`).join('')
hoverBox.innerHTML = `
<strong>Name:</strong> ${d.data.name}<br>
${d.data.iso_1_code ? '<strong>ISO 1 Code:</strong> ' + d.data.iso_1_code + '<br>' : ''}
${d.data.iso_3_code ? '<strong>ISO 3 Code:</strong> ' + d.data.iso_3_code + '<br>' : ''}
${d.data.scripts.length > 0 ? '<strong>Scripts:</strong> ' + d.data.scripts.join(', ') + '<br>' : ''}
${d.data.iso_3_code ? '<strong>In GlotLID:</strong> ' + (d.data.scripts.length > 0 ? 'YES' : 'NO') + '<br>' : ''}
<strong>Tokenizers:</strong><ul>${tokenizersList}</ul>
<strong>Subtree size:</strong> ${d.data.subtreeSize}
`;
}).on("mousemove", function(event) {
hoverBox.style.left = (event.pageX) + "px";
hoverBox.style.top = (event.pageY) + "px";
}).on("mouseout", function() {
hoverBox.style.display = "none";
});
node.on('click', function(event, d) {
setExpanded(d.data.id, !expandedNodes.has(d.data.id))
hoverBox.style.display = "none";
drawVisibleNodes();
})
}
function drawVisibleNodes(recenter = false) {
if (!currentTreeData) return;
// Get selected tokenizers
const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked'))
.map(input => input.value);
const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked'))
.map(input => input.value);
const showNonGlotlid = showNonGlotlidCheckbox.checked;
// Create a new root node containing only selected tokenizers and their ancestors
function filterHierarchy(node, parentExpanded = true, depth = 0) {
if ((node.iso_3_code && node.scripts.length === 0 && !showNonGlotlid) || (!parentExpanded && !(depthSelector.value == 0 || depth <= depthSelector.value))) {
return null;
}
const filteredChildren = (node.children || [])
.map(child => filterHierarchy(child, expandedNodes.has(node.id), depth + 1))
.filter(child => child !== null);
const shouldBeShown = selectedTokenizers.some(tok => (Object.keys(node.tokenizers).length > 0 ? Object.values(node.tokenizers).map(x => x.original_lang_name) : ['x']).includes(tok)) &&
selectedScripts.some(scr => (node.scripts.length ? node.scripts : ['x']).includes(scr) && (node.scripts.length > 0 || showNonGlotlid)); // if one of the leaves has this tokenizer and the selected scripts
if (!parentExpanded && filteredChildren.length === 0 && !(node.children.length === 0 && shouldBeShown) && node.id != 0)
return null;
return {
...node,
children: filteredChildren
};
}
const filteredData = filterHierarchy(currentTreeData);
createTree(filteredData, recenter);
}
function searchNode(name) {
name = name.toLowerCase();
if (!currentTreeData || name.length < 2) return;
// Find node by name
function expandNode(node) {
if (node.name.toLowerCase() === name || node.iso_3_code === name || node.iso_1_code === name) {
return node;
}
for (const child of (node.children || [])) {
const found = expandNode(child);
if (found) {
setExpanded(node.id)
return found;
}
}
return null;
}
const expandedNode = expandNode(currentTreeData);
if (expandedNode) {
nodeToCenter = expandedNode.id;
drawVisibleNodes();
}
}
});