lusxvr commited on
Commit
8a84412
1 Parent(s): 9577105
app/src/content/article.mdx CHANGED
@@ -54,8 +54,8 @@ We manually collect **over 180** image-text datasets from the recent literature
54
  <Wide>
55
  <Accordion title="FineVision Subsets">
56
  |Subset Name |Total Images|Total Samples|Total Turns|Total Question Tokens|Total Answer Tokens|Category |Source |
57
- |--------------------------------------|------------|-------------|-----------|---------------------|-------------------|----------------------|-------------------------------------------------------------------|
58
- |coco_colors |118,287 |118,287 |118,287 |1,301,157 |6,376,672 |Captioning & Knowledge|[@noauthor_hazal-karakusmscoco-controlnet-canny-less-colors_nodate]|
59
  |densefusion_1m |1,058,751 |1,058,751 |1,058,751 |10,692,478 |263,718,217 |Captioning & Knowledge|[@li_densefusion-1m_2024] |
60
  |face_emotion |797 |797 |797 |8,767 |8,066 |Captioning & Knowledge|[@mollahosseini_affectnet_2017] |
61
  |google_landmarks |299,993 |299,993 |842,127 |6,194,978 |10,202,980 |Captioning & Knowledge|Ours |
@@ -68,7 +68,7 @@ We manually collect **over 180** image-text datasets from the recent literature
68
  |sharegpt4v(llava) |29,986 |29,986 |29,986 |275,783 |6,175,899 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
69
  |sharegpt4v(sam) |8,990 |8,990 |8,990 |82,874 |1,668,797 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
70
  |textcaps |21,906 |21,906 |21,906 |240,966 |355,991 |Captioning & Knowledge|[@vedaldi_textcaps_2020] |
71
- |chart2text |26,961 |26,961 |30,215 |342,215 |2,670,580 |Chart & Table |[@kantharaj_chart--text_2022] |
72
  |chartqa |18,265 |18,265 |28,287 |625,569 |134,793 |Chart & Table |[@masry_chartqa_2022] |
73
  |CoSyn_400k_chart |116,814 |116,814 |1,085,882 |17,617,591 |57,641,030 |Chart & Table |[@yang_scaling_2025] |
74
  |CoSyn_400k_table |46,518 |46,518 |416,519 |6,280,455 |23,335,054 |Chart & Table |[@yang_scaling_2025] |
 
54
  <Wide>
55
  <Accordion title="FineVision Subsets">
56
  |Subset Name |Total Images|Total Samples|Total Turns|Total Question Tokens|Total Answer Tokens|Category |Source |
57
+ |--------------------------------------|------------|-------------|-----------|---------------------|-------------------|----------------------|------- |
58
+ |coco_colors |118,287 |118,287 |118,287 |1,301,157 |6,376,672 |Captioning & Knowledge|[@noauthor_hazal-karakusmscoco-controlnet] |
59
  |densefusion_1m |1,058,751 |1,058,751 |1,058,751 |10,692,478 |263,718,217 |Captioning & Knowledge|[@li_densefusion-1m_2024] |
60
  |face_emotion |797 |797 |797 |8,767 |8,066 |Captioning & Knowledge|[@mollahosseini_affectnet_2017] |
61
  |google_landmarks |299,993 |299,993 |842,127 |6,194,978 |10,202,980 |Captioning & Knowledge|Ours |
 
68
  |sharegpt4v(llava) |29,986 |29,986 |29,986 |275,783 |6,175,899 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
69
  |sharegpt4v(sam) |8,990 |8,990 |8,990 |82,874 |1,668,797 |Captioning & Knowledge|[@leonardis_sharegpt4v_2025] |
70
  |textcaps |21,906 |21,906 |21,906 |240,966 |355,991 |Captioning & Knowledge|[@vedaldi_textcaps_2020] |
71
+ |chart2text |26,961 |26,961 |30,215 |342,215 |2,670,580 |Chart & Table |[@kantharaj_chart-text_2022] |
72
  |chartqa |18,265 |18,265 |28,287 |625,569 |134,793 |Chart & Table |[@masry_chartqa_2022] |
73
  |CoSyn_400k_chart |116,814 |116,814 |1,085,882 |17,617,591 |57,641,030 |Chart & Table |[@yang_scaling_2025] |
74
  |CoSyn_400k_table |46,518 |46,518 |416,519 |6,280,455 |23,335,054 |Chart & Table |[@yang_scaling_2025] |
app/src/content/bibliography.bib CHANGED
@@ -12,7 +12,7 @@
12
  file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/ZVH2EVDN/Li et al. - 2024 - Densefusion-1m Merging vision experts for comprehensive multimodal perception.pdf:application/pdf},
13
  }
14
 
15
- @online{noauthor_hazal-karakusmscoco-controlnet-canny-less-colors_nodate,
16
  title = {hazal-karakus/mscoco-controlnet-canny-less-colors 路 Datasets at Hugging Face},
17
  url = {https://huggingface.co/datasets/hazal-karakus/mscoco-controlnet-canny-less-colors/viewer},
18
  abstract = {We鈥檙e on a journey to advance and democratize artificial intelligence through open source and open science.},
@@ -83,7 +83,7 @@
83
  file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/4VWQHTEL/Sidorov et al. - 2020 - TextCaps A Dataset for Image Captioning with Reading Comprehension.pdf:application/pdf},
84
  }
85
 
86
- @misc{kantharaj_chart--text_2022,
87
  title = {Chart-to-Text: A Large-Scale Benchmark for Chart Summarization},
88
  url = {http://arxiv.org/abs/2203.06486},
89
  doi = {10.48550/arXiv.2203.06486},
 
12
  file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/ZVH2EVDN/Li et al. - 2024 - Densefusion-1m Merging vision experts for comprehensive multimodal perception.pdf:application/pdf},
13
  }
14
 
15
+ @online{noauthor_hazal-karakusmscoco-controlnet,
16
  title = {hazal-karakus/mscoco-controlnet-canny-less-colors 路 Datasets at Hugging Face},
17
  url = {https://huggingface.co/datasets/hazal-karakus/mscoco-controlnet-canny-less-colors/viewer},
18
  abstract = {We鈥檙e on a journey to advance and democratize artificial intelligence through open source and open science.},
 
83
  file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/4VWQHTEL/Sidorov et al. - 2020 - TextCaps A Dataset for Image Captioning with Reading Comprehension.pdf:application/pdf},
84
  }
85
 
86
+ @misc{kantharaj_chart-text_2022,
87
  title = {Chart-to-Text: A Large-Scale Benchmark for Chart Summarization},
88
  url = {http://arxiv.org/abs/2203.06486},
89
  doi = {10.48550/arXiv.2203.06486},