FineVision

Running

App Files Files Community

lusxvr commited on Sep 2

Commit

8a84412

1 Parent(s): 9577105

update

Browse files

Files changed (2) hide show

app/src/content/article.mdx +3 -3
app/src/content/bibliography.bib +2 -2

app/src/content/article.mdx CHANGED Viewed

@@ -54,8 +54,8 @@ We manually collect **over 180** image-text datasets from the recent literature
 <Wide>
 <Accordion title="FineVision Subsets">
 |Subset Name                           |Total Images|Total Samples|Total Turns|Total Question Tokens|Total Answer Tokens|Category              |Source                                                             |
-|--------------------------------------|------------|-------------|-----------|---------------------|-------------------|----------------------|-------------------------------------------------------------------|
-|coco_colors                           |118,287     |118,287      |118,287    |1,301,157            |6,376,672          |Captioning & Knowledge|[@noauthor_hazal-karakusmscoco-controlnet-canny-less-colors_nodate]|
 |densefusion_1m                        |1,058,751   |1,058,751    |1,058,751  |10,692,478           |263,718,217        |Captioning & Knowledge|[@li_densefusion-1m_2024]                                          |
 |face_emotion                          |797         |797          |797        |8,767                |8,066              |Captioning & Knowledge|[@mollahosseini_affectnet_2017]                                    |
 |google_landmarks                      |299,993     |299,993      |842,127    |6,194,978            |10,202,980         |Captioning & Knowledge|Ours                                                               |
@@ -68,7 +68,7 @@ We manually collect **over 180** image-text datasets from the recent literature
 |sharegpt4v(llava)                     |29,986      |29,986       |29,986     |275,783              |6,175,899          |Captioning & Knowledge|[@leonardis_sharegpt4v_2025]                                       |
 |sharegpt4v(sam)                       |8,990       |8,990        |8,990      |82,874               |1,668,797          |Captioning & Knowledge|[@leonardis_sharegpt4v_2025]                                       |
 |textcaps                              |21,906      |21,906       |21,906     |240,966              |355,991            |Captioning & Knowledge|[@vedaldi_textcaps_2020]                                           |
-|chart2text                            |26,961      |26,961       |30,215     |342,215              |2,670,580          |Chart & Table         |[@kantharaj_chart--text_2022]                                      |
 |chartqa                               |18,265      |18,265       |28,287     |625,569              |134,793            |Chart & Table         |[@masry_chartqa_2022]                                              |
 |CoSyn_400k_chart                      |116,814     |116,814      |1,085,882  |17,617,591           |57,641,030         |Chart & Table         |[@yang_scaling_2025]                                               |
 |CoSyn_400k_table                      |46,518      |46,518       |416,519    |6,280,455            |23,335,054         |Chart & Table         |[@yang_scaling_2025]                                               |

 <Wide>
 <Accordion title="FineVision Subsets">
 |Subset Name                           |Total Images|Total Samples|Total Turns|Total Question Tokens|Total Answer Tokens|Category              |Source                                                             |
+|--------------------------------------|------------|-------------|-----------|---------------------|-------------------|----------------------|-------                                                            |
+|coco_colors                           |118,287     |118,287      |118,287    |1,301,157            |6,376,672          |Captioning & Knowledge|[@noauthor_hazal-karakusmscoco-controlnet]                         |
 |densefusion_1m                        |1,058,751   |1,058,751    |1,058,751  |10,692,478           |263,718,217        |Captioning & Knowledge|[@li_densefusion-1m_2024]                                          |
 |face_emotion                          |797         |797          |797        |8,767                |8,066              |Captioning & Knowledge|[@mollahosseini_affectnet_2017]                                    |
 |google_landmarks                      |299,993     |299,993      |842,127    |6,194,978            |10,202,980         |Captioning & Knowledge|Ours                                                               |
 |sharegpt4v(llava)                     |29,986      |29,986       |29,986     |275,783              |6,175,899          |Captioning & Knowledge|[@leonardis_sharegpt4v_2025]                                       |
 |sharegpt4v(sam)                       |8,990       |8,990        |8,990      |82,874               |1,668,797          |Captioning & Knowledge|[@leonardis_sharegpt4v_2025]                                       |
 |textcaps                              |21,906      |21,906       |21,906     |240,966              |355,991            |Captioning & Knowledge|[@vedaldi_textcaps_2020]                                           |
+|chart2text                            |26,961      |26,961       |30,215     |342,215              |2,670,580          |Chart & Table         |[@kantharaj_chart-text_2022]                                       |
 |chartqa                               |18,265      |18,265       |28,287     |625,569              |134,793            |Chart & Table         |[@masry_chartqa_2022]                                              |
 |CoSyn_400k_chart                      |116,814     |116,814      |1,085,882  |17,617,591           |57,641,030         |Chart & Table         |[@yang_scaling_2025]                                               |
 |CoSyn_400k_table                      |46,518      |46,518       |416,519    |6,280,455            |23,335,054         |Chart & Table         |[@yang_scaling_2025]                                               |

app/src/content/bibliography.bib CHANGED Viewed

@@ -12,7 +12,7 @@
 	file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/ZVH2EVDN/Li et al. - 2024 - Densefusion-1m Merging vision experts for comprehensive multimodal perception.pdf:application/pdf},
 }
-@online{noauthor_hazal-karakusmscoco-controlnet-canny-less-colors_nodate,
 	title = {hazal-karakus/mscoco-controlnet-canny-less-colors · Datasets at Hugging Face},
 	url = {https://huggingface.co/datasets/hazal-karakus/mscoco-controlnet-canny-less-colors/viewer},
 	abstract = {We’re on a journey to advance and democratize artificial intelligence through open source and open science.},
@@ -83,7 +83,7 @@
 	file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/4VWQHTEL/Sidorov et al. - 2020 - TextCaps A Dataset for Image Captioning with Reading Comprehension.pdf:application/pdf},
 }
-@misc{kantharaj_chart--text_2022,
 	title = {Chart-to-Text: A Large-Scale Benchmark for Chart Summarization},
 	url = {http://arxiv.org/abs/2203.06486},
 	doi = {10.48550/arXiv.2203.06486},

 	file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/ZVH2EVDN/Li et al. - 2024 - Densefusion-1m Merging vision experts for comprehensive multimodal perception.pdf:application/pdf},
 }
+@online{noauthor_hazal-karakusmscoco-controlnet,
 	title = {hazal-karakus/mscoco-controlnet-canny-less-colors · Datasets at Hugging Face},
 	url = {https://huggingface.co/datasets/hazal-karakus/mscoco-controlnet-canny-less-colors/viewer},
 	abstract = {We’re on a journey to advance and democratize artificial intelligence through open source and open science.},
 	file = {Available Version (via Google Scholar):/Users/luis/Zotero/storage/4VWQHTEL/Sidorov et al. - 2020 - TextCaps A Dataset for Image Captioning with Reading Comprehension.pdf:application/pdf},
 }
+@misc{kantharaj_chart-text_2022,
 	title = {Chart-to-Text: A Large-Scale Benchmark for Chart Summarization},
 	url = {http://arxiv.org/abs/2203.06486},
 	doi = {10.48550/arXiv.2203.06486},