Spaces:
Running
Running
update
Browse files
app/src/content/article.mdx
CHANGED
@@ -54,8 +54,8 @@ We manually collect **over 180** image-text datasets from the recent literature
|
|
54 |
<Wide>
|
55 |
<Accordion title="FineVision Subsets">
|
56 |
|Subset Name |Total Images|Total Samples|Total Turns|Total Question Tokens|Total Answer Tokens|Category |Source |
|
57 |
-
|
58 |
-
|coco_colors |118,287 |118,287 |118,287 |1,301,157 |6,376,672 |Captioning & Knowledge|[@noauthor_hazal-karakusmscoco-controlnet
|
59 |
|densefusion_1m |1,058,751 |1,058,751 |1,058,751 |10,692,478 |263,718,217 |Captioning & Knowledge|[@li_densefusion-1m_2024] |
|
60 |
|face_emotion |797 |797 |797 |8,767 |8,066 |Captioning & Knowledge|[@mollahosseini_affectnet_2017] |
|
61 |
|google_landmarks |299,993 |299,993 |842,127 |6,194,978 |10,202,980 |Captioning & Knowledge|Ours |
|
@@ -68,7 +68,7 @@ We manually collect **over 180** image-text datasets from the recent literature
|
|
68 |
|sharegpt4v(llava) |29,986 |29,986 |29,986 |275,783 |6,175,899 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
|
69 |
|sharegpt4v(sam) |8,990 |8,990 |8,990 |82,874 |1,668,797 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
|
70 |
|textcaps |21,906 |21,906 |21,906 |240,966 |355,991 |Captioning & Knowledge|[@vedaldi_textcaps_2020] |
|
71 |
-
|chart2text |26,961 |26,961 |30,215 |342,215 |2,670,580 |Chart & Table |[@kantharaj_chart
|
72 |
|chartqa |18,265 |18,265 |28,287 |625,569 |134,793 |Chart & Table |[@masry_chartqa_2022] |
|
73 |
|CoSyn_400k_chart |116,814 |116,814 |1,085,882 |17,617,591 |57,641,030 |Chart & Table |[@yang_scaling_2025] |
|
74 |
|CoSyn_400k_table |46,518 |46,518 |416,519 |6,280,455 |23,335,054 |Chart & Table |[@yang_scaling_2025] |
|
|
|
54 |
<Wide>
|
55 |
<Accordion title="FineVision Subsets">
|
56 |
|Subset Name |Total Images|Total Samples|Total Turns|Total Question Tokens|Total Answer Tokens|Category |Source |
|
57 |
+
|--------------------------------------|------------|-------------|-----------|---------------------|-------------------|----------------------|------- |
|
58 |
+
|coco_colors |118,287 |118,287 |118,287 |1,301,157 |6,376,672 |Captioning & Knowledge|[@noauthor_hazal-karakusmscoco-controlnet] |
|
59 |
|densefusion_1m |1,058,751 |1,058,751 |1,058,751 |10,692,478 |263,718,217 |Captioning & Knowledge|[@li_densefusion-1m_2024] |
|
60 |
|face_emotion |797 |797 |797 |8,767 |8,066 |Captioning & Knowledge|[@mollahosseini_affectnet_2017] |
|
61 |
|google_landmarks |299,993 |299,993 |842,127 |6,194,978 |10,202,980 |Captioning & Knowledge|Ours |
|
|
|
68 |
|sharegpt4v(llava) |29,986 |29,986 |29,986 |275,783 |6,175,899 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
|
69 |
|sharegpt4v(sam) |8,990 |8,990 |8,990 |82,874 |1,668,797 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
|
70 |
|textcaps |21,906 |21,906 |21,906 |240,966 |355,991 |Captioning & Knowledge|[@vedaldi_textcaps_2020] |
|
71 |
+
|chart2text |26,961 |26,961 |30,215 |342,215 |2,670,580 |Chart & Table |[@kantharaj_chart-text_2022] |
|
72 |
|chartqa |18,265 |18,265 |28,287 |625,569 |134,793 |Chart & Table |[@masry_chartqa_2022] |
|
73 |
|CoSyn_400k_chart |116,814 |116,814 |1,085,882 |17,617,591 |57,641,030 |Chart & Table |[@yang_scaling_2025] |
|
74 |
|CoSyn_400k_table |46,518 |46,518 |416,519 |6,280,455 |23,335,054 |Chart & Table |[@yang_scaling_2025] |
|
app/src/content/bibliography.bib
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/ZVH2EVDN/Li et al. - 2024 - Densefusion-1m Merging vision experts for comprehensive multimodal perception.pdf:application/pdf},
|
13 |
}
|
14 |
|
15 |
-
@online{noauthor_hazal-karakusmscoco-controlnet
|
16 |
title = {hazal-karakus/mscoco-controlnet-canny-less-colors 路 Datasets at Hugging Face},
|
17 |
url = {https://huggingface.co/datasets/hazal-karakus/mscoco-controlnet-canny-less-colors/viewer},
|
18 |
abstract = {We鈥檙e on a journey to advance and democratize artificial intelligence through open source and open science.},
|
@@ -83,7 +83,7 @@
|
|
83 |
file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/4VWQHTEL/Sidorov et al. - 2020 - TextCaps A Dataset for Image Captioning with Reading Comprehension.pdf:application/pdf},
|
84 |
}
|
85 |
|
86 |
-
@misc{kantharaj_chart
|
87 |
title = {Chart-to-Text: A Large-Scale Benchmark for Chart Summarization},
|
88 |
url = {http://arxiv.org/abs/2203.06486},
|
89 |
doi = {10.48550/arXiv.2203.06486},
|
|
|
12 |
file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/ZVH2EVDN/Li et al. - 2024 - Densefusion-1m Merging vision experts for comprehensive multimodal perception.pdf:application/pdf},
|
13 |
}
|
14 |
|
15 |
+
@online{noauthor_hazal-karakusmscoco-controlnet,
|
16 |
title = {hazal-karakus/mscoco-controlnet-canny-less-colors 路 Datasets at Hugging Face},
|
17 |
url = {https://huggingface.co/datasets/hazal-karakus/mscoco-controlnet-canny-less-colors/viewer},
|
18 |
abstract = {We鈥檙e on a journey to advance and democratize artificial intelligence through open source and open science.},
|
|
|
83 |
file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/4VWQHTEL/Sidorov et al. - 2020 - TextCaps A Dataset for Image Captioning with Reading Comprehension.pdf:application/pdf},
|
84 |
}
|
85 |
|
86 |
+
@misc{kantharaj_chart-text_2022,
|
87 |
title = {Chart-to-Text: A Large-Scale Benchmark for Chart Summarization},
|
88 |
url = {http://arxiv.org/abs/2203.06486},
|
89 |
doi = {10.48550/arXiv.2203.06486},
|