Spaces:
Running
Running
Commit
·
d098e08
1
Parent(s):
eb884e6
fix common crawl stats
Browse files- overview.py +3 -3
overview.py
CHANGED
|
@@ -172,7 +172,7 @@ table_div_2 = Div(NotStr(table_html2), style="margin: 40px;")
|
|
| 172 |
dataset_sources = pd.DataFrame(
|
| 173 |
{
|
| 174 |
"Data Source": [
|
| 175 |
-
"
|
| 176 |
"Papers",
|
| 177 |
"Wikipedia",
|
| 178 |
"Freelaw",
|
|
@@ -185,7 +185,7 @@ dataset_sources = pd.DataFrame(
|
|
| 185 |
"StackExchange",
|
| 186 |
],
|
| 187 |
"Raw Data Size": [
|
| 188 |
-
"
|
| 189 |
"712 GB",
|
| 190 |
"210 GB",
|
| 191 |
"23 GB",
|
|
@@ -198,7 +198,7 @@ dataset_sources = pd.DataFrame(
|
|
| 198 |
"45 GB",
|
| 199 |
],
|
| 200 |
"Token Count": [
|
| 201 |
-
"
|
| 202 |
"154.96B",
|
| 203 |
"4.75B",
|
| 204 |
"7.34B",
|
|
|
|
| 172 |
dataset_sources = pd.DataFrame(
|
| 173 |
{
|
| 174 |
"Data Source": [
|
| 175 |
+
"Common Crawl",
|
| 176 |
"Papers",
|
| 177 |
"Wikipedia",
|
| 178 |
"Freelaw",
|
|
|
|
| 185 |
"StackExchange",
|
| 186 |
],
|
| 187 |
"Raw Data Size": [
|
| 188 |
+
"9.2 TB",
|
| 189 |
"712 GB",
|
| 190 |
"210 GB",
|
| 191 |
"23 GB",
|
|
|
|
| 198 |
"45 GB",
|
| 199 |
],
|
| 200 |
"Token Count": [
|
| 201 |
+
"4.83T",
|
| 202 |
"154.96B",
|
| 203 |
"4.75B",
|
| 204 |
"7.34B",
|