add AIBOM
#5
by
sabato-nocera
- opened
- neulab_Pangea-7B.json +153 -0
neulab_Pangea-7B.json
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bomFormat": "CycloneDX",
|
3 |
+
"specVersion": "1.6",
|
4 |
+
"serialNumber": "urn:uuid:2e11cdec-907d-4d94-8fb5-403f936d9153",
|
5 |
+
"version": 1,
|
6 |
+
"metadata": {
|
7 |
+
"timestamp": "2025-06-05T09:36:57.264117+00:00",
|
8 |
+
"component": {
|
9 |
+
"type": "machine-learning-model",
|
10 |
+
"bom-ref": "neulab/Pangea-7B-0d33173e-6d50-5129-90ef-bfa7e3206cf4",
|
11 |
+
"name": "neulab/Pangea-7B",
|
12 |
+
"externalReferences": [
|
13 |
+
{
|
14 |
+
"url": "https://huggingface.co/neulab/Pangea-7B",
|
15 |
+
"type": "documentation"
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"modelCard": {
|
19 |
+
"modelParameters": {
|
20 |
+
"architectureFamily": "qwen2",
|
21 |
+
"modelArchitecture": "LlavaQwenForCausalLM",
|
22 |
+
"datasets": [
|
23 |
+
{
|
24 |
+
"ref": "neulab/PangeaInstruct-d4b50450-cdc3-5403-9299-37c49a9cf3b7"
|
25 |
+
}
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"properties": [
|
29 |
+
{
|
30 |
+
"name": "base_model",
|
31 |
+
"value": "Qwen/Qwen2-7B-Instruct"
|
32 |
+
}
|
33 |
+
],
|
34 |
+
"consideration": {
|
35 |
+
"useCases": "Pangea-7B follows the architecture of [LLaVA-NeXT](https://github.com/LLaVA-VL/LLaVA-NeXT).You could either (1) follow the same model loading procedures as of [LLaVA-NeXT](https://github.com/LLaVA-VL/LLaVA-NeXT), an example of loading Pangea-7B directly is shown in the Python code below, or (2) use our hf version of Pangea-7B: [Pangea-7B-hf]https://huggingface.co/neulab/Pangea-7B-hf"
|
36 |
+
}
|
37 |
+
},
|
38 |
+
"authors": [
|
39 |
+
{
|
40 |
+
"name": "neulab"
|
41 |
+
}
|
42 |
+
],
|
43 |
+
"licenses": [
|
44 |
+
{
|
45 |
+
"license": {
|
46 |
+
"id": "Apache-2.0",
|
47 |
+
"url": "https://spdx.org/licenses/Apache-2.0.html"
|
48 |
+
}
|
49 |
+
}
|
50 |
+
],
|
51 |
+
"description": "- **Model:** Pangea is a fully open-source Multilingual Multimodal Multicultural LLM.- **Date:** Pangea-7B was trained in 2024.- **Training Dataset:** [6M PangeaIns](https://huggingface.co/datasets/neulab/PangeaInstruct).- **Architecture:** Pangea-7B follows the architecture of [LLaVA-NeXT](https://github.com/LLaVA-VL/LLaVA-NeXT), with a [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct) backbone.",
|
52 |
+
"tags": [
|
53 |
+
"safetensors",
|
54 |
+
"qwen2",
|
55 |
+
"am",
|
56 |
+
"ar",
|
57 |
+
"bg",
|
58 |
+
"bn",
|
59 |
+
"cs",
|
60 |
+
"de",
|
61 |
+
"el",
|
62 |
+
"en",
|
63 |
+
"es",
|
64 |
+
"fa",
|
65 |
+
"fr",
|
66 |
+
"ga",
|
67 |
+
"hi",
|
68 |
+
"id",
|
69 |
+
"ig",
|
70 |
+
"it",
|
71 |
+
"iw",
|
72 |
+
"ja",
|
73 |
+
"jv",
|
74 |
+
"ko",
|
75 |
+
"nl",
|
76 |
+
"mn",
|
77 |
+
"ms",
|
78 |
+
"no",
|
79 |
+
"pl",
|
80 |
+
"pt",
|
81 |
+
"ro",
|
82 |
+
"ru",
|
83 |
+
"si",
|
84 |
+
"su",
|
85 |
+
"sw",
|
86 |
+
"ta",
|
87 |
+
"te",
|
88 |
+
"th",
|
89 |
+
"tr",
|
90 |
+
"uk",
|
91 |
+
"ur",
|
92 |
+
"vi",
|
93 |
+
"zh",
|
94 |
+
"dataset:neulab/PangeaInstruct",
|
95 |
+
"arxiv:2410.16153",
|
96 |
+
"base_model:Qwen/Qwen2-7B-Instruct",
|
97 |
+
"base_model:finetune:Qwen/Qwen2-7B-Instruct",
|
98 |
+
"license:apache-2.0",
|
99 |
+
"region:us"
|
100 |
+
]
|
101 |
+
}
|
102 |
+
},
|
103 |
+
"components": [
|
104 |
+
{
|
105 |
+
"type": "data",
|
106 |
+
"bom-ref": "neulab/PangeaInstruct-d4b50450-cdc3-5403-9299-37c49a9cf3b7",
|
107 |
+
"name": "neulab/PangeaInstruct",
|
108 |
+
"data": [
|
109 |
+
{
|
110 |
+
"type": "dataset",
|
111 |
+
"bom-ref": "neulab/PangeaInstruct-d4b50450-cdc3-5403-9299-37c49a9cf3b7",
|
112 |
+
"name": "neulab/PangeaInstruct",
|
113 |
+
"contents": {
|
114 |
+
"url": "https://huggingface.co/datasets/neulab/PangeaInstruct",
|
115 |
+
"properties": [
|
116 |
+
{
|
117 |
+
"name": "task_categories",
|
118 |
+
"value": "visual-question-answering, question-answering"
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"name": "language",
|
122 |
+
"value": "am, ar, bg, bn, cs, de, el, en, es, fa, fr, ga, hi, id, ig, it, iw, ja, jv, ko, nl, mn, ms, no, pl, pt, ro, ru, si, su, sw, ta, te, th, tr, uk, ur, vi, zh"
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"name": "size_categories",
|
126 |
+
"value": "1M<n<10M"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"name": "pretty_name",
|
130 |
+
"value": "PangeaIns"
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"name": "license",
|
134 |
+
"value": "apache-2.0"
|
135 |
+
}
|
136 |
+
]
|
137 |
+
},
|
138 |
+
"governance": {
|
139 |
+
"owners": [
|
140 |
+
{
|
141 |
+
"organization": {
|
142 |
+
"name": "neulab",
|
143 |
+
"url": "https://huggingface.co/neulab"
|
144 |
+
}
|
145 |
+
}
|
146 |
+
]
|
147 |
+
},
|
148 |
+
"description": "\n\t\n\t\t\n\t\tPangeaInstruct\n\t\n\nPangea: A Fully Open Multilingual Multimodal LLM for 39 Languages\n\ud83c\uddea\ud83c\uddf9 \ud83c\uddf8\ud83c\udde6 \ud83c\udde7\ud83c\uddec \ud83c\udde7\ud83c\udde9 \ud83c\udde8\ud83c\uddff \ud83c\udde9\ud83c\uddea \ud83c\uddec\ud83c\uddf7 \ud83c\uddec\ud83c\udde7 \ud83c\uddfa\ud83c\uddf8 \ud83c\uddea\ud83c\uddf8 \ud83c\uddee\ud83c\uddf7 \ud83c\uddeb\ud83c\uddf7 \ud83c\uddee\ud83c\uddea \ud83c\uddee\ud83c\uddf3 \ud83c\uddee\ud83c\udde9 \ud83c\uddf3\ud83c\uddec \ud83c\uddee\ud83c\uddf9 \ud83c\uddee\ud83c\uddf1 \ud83c\uddef\ud83c\uddf5 \ud83c\uddee\ud83c\udde9 \ud83c\uddf0\ud83c\uddf7 \ud83c\uddf3\ud83c\uddf1 \ud83c\uddf2\ud83c\uddf3 \ud83c\uddf2\ud83c\uddfe \ud83c\uddf3\ud83c\uddf4 \ud83c\uddf5\ud83c\uddf1 \ud83c\uddf5\ud83c\uddf9 \ud83c\udde7\ud83c\uddf7 \ud83c\uddf7\ud83c\uddf4 \ud83c\uddf7\ud83c\uddfa \ud83c\uddf1\ud83c\uddf0 \ud83c\uddee\ud83c\udde9 \ud83c\uddf0\ud83c\uddea \ud83c\uddf9\ud83c\uddff \ud83c\uddf1\ud83c\uddf0 \ud83c\uddee\ud83c\uddf3 \ud83c\uddee\ud83c\uddf3 \ud83c\uddf9\ud83c\udded \ud83c\uddf9\ud83c\uddf7 \ud83c\uddfa\ud83c\udde6 \ud83c\uddf5\ud83c\uddf0 \ud83c\uddee\ud83c\uddf3 \ud83c\uddfb\ud83c\uddf3 \ud83c\udde8\ud83c\uddf3 \ud83c\uddf9\ud83c\uddfc\n\ud83c\udfe0 Homepage | \ud83e\udd16 Pangea-7B | \ud83d\udcca PangeaIns | \ud83e\uddea PangeaBench | \ud83d\udcbb Github | \ud83d\udcc4 Arxiv | \ud83d\udcd5 PDF | \ud83d\udda5\ufe0f Demo\n\n\nThis README provides comprehensive details on the PangeaIns dataset, which\u2026 See the full description on the dataset page: https://huggingface.co/datasets/neulab/PangeaInstruct."
|
149 |
+
}
|
150 |
+
]
|
151 |
+
}
|
152 |
+
]
|
153 |
+
}
|