Spaces:
Runtime error
Runtime error
include enrichment
Browse files- Dockerfile +1 -1
- prepare.py +17 -1
Dockerfile
CHANGED
@@ -10,7 +10,7 @@ RUN pip install pip -U
|
|
10 |
|
11 |
RUN pip install renumics-spotlight==1.3.0rc8 pyarrow
|
12 |
|
13 |
-
RUN pip install datasets
|
14 |
|
15 |
COPY . .
|
16 |
RUN mkdir -p /code/.cache
|
|
|
10 |
|
11 |
RUN pip install renumics-spotlight==1.3.0rc8 pyarrow
|
12 |
|
13 |
+
RUN pip install datasets cleanvision
|
14 |
|
15 |
COPY . .
|
16 |
RUN mkdir -p /code/.cache
|
prepare.py
CHANGED
@@ -2,7 +2,18 @@ import pickle
|
|
2 |
import datasets
|
3 |
import os
|
4 |
import pandas as pd
|
|
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
if __name__ == "__main__":
|
7 |
cache_file = "dataset_cache.parquet"
|
8 |
if os.path.exists(cache_file):
|
@@ -14,7 +25,12 @@ if __name__ == "__main__":
|
|
14 |
dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
|
15 |
print("Dataset loaded using datasets.load_dataset().")
|
16 |
|
17 |
-
df = dataset.to_pandas()
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
# Save dataset to cache
|
|
|
2 |
import datasets
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
+
from cleanvision.imagelab import Imagelab
|
6 |
|
7 |
+
def cv_issues_cleanvision(df, image_name='image'):
|
8 |
+
|
9 |
+
image_paths = df['image'].to_list()
|
10 |
+
imagelab = Imagelab(filepaths=image_paths)
|
11 |
+
imagelab.find_issues()
|
12 |
+
|
13 |
+
df_cv=imagelab.issues.reset_index()
|
14 |
+
|
15 |
+
return df_cv
|
16 |
+
|
17 |
if __name__ == "__main__":
|
18 |
cache_file = "dataset_cache.parquet"
|
19 |
if os.path.exists(cache_file):
|
|
|
25 |
dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
|
26 |
print("Dataset loaded using datasets.load_dataset().")
|
27 |
|
28 |
+
df = dataset.to_pandas()
|
29 |
+
|
30 |
+
df=df.reset_index(drop=True)
|
31 |
+
|
32 |
+
df_cv=cv_issues_cleanvision(df)
|
33 |
+
df = pd.concat([df, df_cv], axis=1)
|
34 |
|
35 |
|
36 |
# Save dataset to cache
|