Spaces:
Running
Running
thibaud frere
commited on
Commit
·
a551721
1
Parent(s):
3aae690
add comparison chart
Browse files- app/src/content/article.mdx +2 -1
- app/src/content/assets/data/comparison/id_1_rank_1_sim_1.000.png +3 -0
- app/src/content/assets/data/comparison/id_1_rank_2_sim_0.165.png +3 -0
- app/src/content/assets/data/comparison/id_1_rank_3_sim_0.143.png +3 -0
- app/src/content/assets/data/comparison/id_2_rank_1_sim_1.000.png +3 -0
- app/src/content/assets/data/comparison/id_2_rank_2_sim_0.978.png +3 -0
- app/src/content/assets/data/comparison/id_2_rank_3_sim_0.975.png +3 -0
- app/src/content/assets/data/comparison/id_3_rank_1_sim_0.936.png +3 -0
- app/src/content/assets/data/comparison/id_3_rank_2_sim_0.686.png +3 -0
- app/src/content/assets/data/comparison/id_3_rank_3_sim_0.676.png +3 -0
- app/src/content/embeds/comparison.html +126 -0
app/src/content/article.mdx
CHANGED
|
@@ -268,7 +268,8 @@ Compared against existing VLM training datasets, FineVision produces significant
|
|
| 268 |
### How contaminated are the datasets?
|
| 269 |
To investigate data leakage from benchmarks into this dataset, we construct a deduplication pipeline based on the sample images. We embed the images of 66 image-test datasets from the lmms-eval framework using the SSCD descriptor, and compute the cosine similarity between our samples and the test-set embeddings. Whenever a sample has a similarity higher than a threshold of 0.95 it is assumed to be a duplicate. While our tests with various thresholds show that this is flagging some samples that are not actual duplicates (especially if the image depicts similar but different images in detail, like graphs or tables), we preferred to err on the side of caution. We open-source the deduplication pipeline here as well as the precomputed test-set embedding’s here.
|
| 270 |
|
| 271 |
-
|
|
|
|
| 272 |
|
| 273 |
| Name | Samples | Contamination Rate | Performance Drop |
|
| 274 |
|---------------|---------|--------------------|------------------|
|
|
|
|
| 268 |
### How contaminated are the datasets?
|
| 269 |
To investigate data leakage from benchmarks into this dataset, we construct a deduplication pipeline based on the sample images. We embed the images of 66 image-test datasets from the lmms-eval framework using the SSCD descriptor, and compute the cosine similarity between our samples and the test-set embeddings. Whenever a sample has a similarity higher than a threshold of 0.95 it is assumed to be a duplicate. While our tests with various thresholds show that this is flagging some samples that are not actual duplicates (especially if the image depicts similar but different images in detail, like graphs or tables), we preferred to err on the side of caution. We open-source the deduplication pipeline here as well as the precomputed test-set embedding’s here.
|
| 270 |
|
| 271 |
+
<HtmlEmbed src="comparison.html" desc="desc" title="title"/>
|
| 272 |
+
|
| 273 |
|
| 274 |
| Name | Samples | Contamination Rate | Performance Drop |
|
| 275 |
|---------------|---------|--------------------|------------------|
|
app/src/content/assets/data/comparison/id_1_rank_1_sim_1.000.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_1_rank_2_sim_0.165.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_1_rank_3_sim_0.143.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_2_rank_1_sim_1.000.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_2_rank_2_sim_0.978.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_2_rank_3_sim_0.975.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_3_rank_1_sim_0.936.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_3_rank_2_sim_0.686.png
ADDED
|
Git LFS Details
|
app/src/content/assets/data/comparison/id_3_rank_3_sim_0.676.png
ADDED
|
Git LFS Details
|
app/src/content/embeds/comparison.html
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="image-comparison" style="width:100%;margin:10px 0;"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.image-comparison { position: relative; }
|
| 4 |
+
.image-comparison .controls { display:flex; align-items:center; gap:16px; justify-content:flex-start; margin:12px 0; }
|
| 5 |
+
.image-comparison .controls label { font-size:12px; color: var(--muted-color); display:flex; align-items:center; gap:8px; }
|
| 6 |
+
.image-comparison .controls select {
|
| 7 |
+
font-size: 12px;
|
| 8 |
+
padding: 8px 28px 8px 10px;
|
| 9 |
+
border: 1px solid var(--border-color);
|
| 10 |
+
border-radius: 8px;
|
| 11 |
+
background-color: var(--surface-bg);
|
| 12 |
+
color: var(--text-color);
|
| 13 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 24 24' fill='none' stroke='%230f1115' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'/%3E%3C/svg%3E");
|
| 14 |
+
background-repeat: no-repeat;
|
| 15 |
+
background-position: right 8px center;
|
| 16 |
+
background-size: 12px;
|
| 17 |
+
-webkit-appearance: none; appearance: none; cursor: pointer;
|
| 18 |
+
transition: border-color .15s ease, box-shadow .15s ease;
|
| 19 |
+
}
|
| 20 |
+
[data-theme="dark"] .image-comparison .controls select {
|
| 21 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 24 24' fill='none' stroke='%23ffffff' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'/%3E%3C/svg%3E");
|
| 22 |
+
}
|
| 23 |
+
.image-comparison .controls select:hover { border-color: var(--primary-color); }
|
| 24 |
+
.image-comparison .controls select:focus { border-color: var(--primary-color); box-shadow: 0 0 0 3px rgba(232,137,171,.25); outline: none; }
|
| 25 |
+
|
| 26 |
+
.image-comparison .grid { display:grid; grid-template-columns: repeat(4, 1fr); gap: 12px; width:100%; align-items: start; }
|
| 27 |
+
@media (max-width: 980px) { .image-comparison .grid { grid-template-columns: repeat(2, 1fr); } }
|
| 28 |
+
|
| 29 |
+
.image-comparison .card { position: relative; border:1px solid var(--border-color); border-radius:10px; overflow:hidden; background: var(--surface-bg); display:flex; flex-direction:column; }
|
| 30 |
+
.image-comparison .card .media { position: relative; width:100%; height: 200px; background: var(--surface-2, var(--surface-bg)); display:block; }
|
| 31 |
+
.image-comparison .card .media img { width:100%; height:100%; object-fit: contain; display:block; }
|
| 32 |
+
.image-comparison .badge { position:absolute; top:8px; left:8px; font-size:11px; padding:3px 6px; border-radius:6px; background: var(--surface-bg); color: var(--text-color); border:1px solid var(--border-color); }
|
| 33 |
+
.image-comparison .meta { padding:8px 10px; border-top:1px solid var(--border-color); font-size:12px; display:flex; height: 55px; align-items:start; justify-content:space-between; gap:8px; }
|
| 34 |
+
.image-comparison .meta .label { color: var(--muted-color); }
|
| 35 |
+
.image-comparison .meta .value { font-weight:600; }
|
| 36 |
+
</style>
|
| 37 |
+
<script>
|
| 38 |
+
(() => {
|
| 39 |
+
const bootstrap = () => {
|
| 40 |
+
const mount = document.currentScript ? document.currentScript.previousElementSibling : null;
|
| 41 |
+
const container = (mount && mount.querySelector && mount.querySelector('.image-comparison')) || document.querySelector('.image-comparison');
|
| 42 |
+
if (!container) return;
|
| 43 |
+
if (container.dataset && container.dataset.mounted === 'true') return; if (container.dataset) container.dataset.mounted = 'true';
|
| 44 |
+
|
| 45 |
+
// Known filenames in /public/data/comparison
|
| 46 |
+
const FILES = {
|
| 47 |
+
'1': { 1: 'id_1_rank_1_sim_1.000.png', 2: 'id_1_rank_2_sim_0.165.png', 3: 'id_1_rank_3_sim_0.143.png' },
|
| 48 |
+
'2': { 1: 'id_2_rank_1_sim_1.000.png', 2: 'id_2_rank_2_sim_0.978.png', 3: 'id_2_rank_3_sim_0.975.png' },
|
| 49 |
+
'3': { 1: 'id_3_rank_1_sim_0.936.png', 2: 'id_3_rank_2_sim_0.686.png', 3: 'id_3_rank_3_sim_0.676.png' },
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
// Images served from [domain]/public/data/comparison/*.png → path is /data/comparison/
|
| 53 |
+
const CANDIDATE_BASES = [ '/data/comparison/' ];
|
| 54 |
+
|
| 55 |
+
const resolveBase = (candidates, filename) => new Promise((resolve) => {
|
| 56 |
+
let idx = 0; const tryNext = () => {
|
| 57 |
+
if (idx >= candidates.length) return resolve(candidates[0]);
|
| 58 |
+
const img = new Image();
|
| 59 |
+
img.onload = () => resolve(candidates[idx]);
|
| 60 |
+
img.onerror = () => { idx += 1; tryNext(); };
|
| 61 |
+
img.src = candidates[idx] + filename;
|
| 62 |
+
}; tryNext();
|
| 63 |
+
});
|
| 64 |
+
|
| 65 |
+
// Controls
|
| 66 |
+
const controls = document.createElement('div'); controls.className = 'controls';
|
| 67 |
+
const label = document.createElement('label'); label.textContent = 'Example';
|
| 68 |
+
const select = document.createElement('select');
|
| 69 |
+
const EXAMPLE_LABELS = { '1': 'photo', '2': 'chart', '3': 'drawing' };
|
| 70 |
+
['1','2','3'].forEach((id)=>{ const o=document.createElement('option'); o.value=id; o.textContent=EXAMPLE_LABELS[id]; select.appendChild(o); });
|
| 71 |
+
label.appendChild(select); controls.appendChild(label); container.appendChild(controls);
|
| 72 |
+
|
| 73 |
+
// Grid
|
| 74 |
+
const grid = document.createElement('div'); grid.className = 'grid'; container.appendChild(grid);
|
| 75 |
+
|
| 76 |
+
let basePath = CANDIDATE_BASES[0];
|
| 77 |
+
|
| 78 |
+
const parseInfo = (filename) => {
|
| 79 |
+
const rankMatch = filename.match(/rank_(\d+)/i); const rank = rankMatch ? rankMatch[1] : '';
|
| 80 |
+
const simMatch = filename.match(/sim_([0-9.]+)/i); const sim = simMatch ? simMatch[1] : '';
|
| 81 |
+
return { rank, sim };
|
| 82 |
+
};
|
| 83 |
+
|
| 84 |
+
const formatSim = (val) => {
|
| 85 |
+
if (val == null || val === '') return '—';
|
| 86 |
+
return String(val).replace(/\.$/, '');
|
| 87 |
+
};
|
| 88 |
+
|
| 89 |
+
const render = (id) => {
|
| 90 |
+
const files = FILES[id]; if (!files) return;
|
| 91 |
+
const ordered = [files[1], files[1], files[2], files[3]]; // rank_1 twice then rank_2 and rank_3
|
| 92 |
+
grid.innerHTML = '';
|
| 93 |
+
ordered.forEach((fname, idx) => {
|
| 94 |
+
const { sim } = parseInfo(fname);
|
| 95 |
+
const isFirst = idx === 0;
|
| 96 |
+
const isDuplicateOfFirst = idx === 1;
|
| 97 |
+
const card = document.createElement('div'); card.className = 'card';
|
| 98 |
+
const media = document.createElement('div'); media.className = 'media';
|
| 99 |
+
const img = document.createElement('img'); img.alt = `example ${id} image ${idx+1}${isDuplicateOfFirst ? ' identical' : ''}`; img.loading = 'lazy'; img.src = basePath + fname; media.appendChild(img);
|
| 100 |
+
const meta = document.createElement('div'); meta.className = 'meta';
|
| 101 |
+
const left = document.createElement('span'); left.className = 'label'; left.textContent = isFirst ? 'Query' : 'Similarity';
|
| 102 |
+
meta.appendChild(left);
|
| 103 |
+
if (!isFirst) {
|
| 104 |
+
const right = document.createElement('span'); right.className = 'value'; right.textContent = isDuplicateOfFirst ? '1.000 identical' : formatSim(sim);
|
| 105 |
+
meta.appendChild(right);
|
| 106 |
+
}
|
| 107 |
+
card.appendChild(media); card.appendChild(meta); grid.appendChild(card);
|
| 108 |
+
});
|
| 109 |
+
};
|
| 110 |
+
|
| 111 |
+
(async () => {
|
| 112 |
+
// Resolve a working base then initial render
|
| 113 |
+
basePath = await resolveBase(CANDIDATE_BASES, FILES['1'][1]);
|
| 114 |
+
render('1');
|
| 115 |
+
})();
|
| 116 |
+
|
| 117 |
+
select.addEventListener('change', () => render(select.value));
|
| 118 |
+
};
|
| 119 |
+
|
| 120 |
+
if (document.readyState === 'loading') {
|
| 121 |
+
document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
|
| 122 |
+
} else { bootstrap(); }
|
| 123 |
+
})();
|
| 124 |
+
</script>
|
| 125 |
+
|
| 126 |
+
|