File size: 1,088 Bytes
eebf151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18df44d
eebf151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import pickle
import datasets
import os
from cleanvision.imagelab import Imagelab
import pandas as pd




def cv_issues_cleanvision(df, image_name='image'):

    image_paths = df['image'].to_list()
    imagelab = Imagelab(filepaths=image_paths)
    imagelab.find_issues()

    df_cv=imagelab.issues.reset_index()

    return df_cv
    

if __name__ == "__main__":
    cache_file = "dataset_cache.parquet"
    if os.path.exists(cache_file):
        # Load dataset from cache
        df = pd.read_parquet(cache_file)
        print("Dataset loaded from cache.")
    else:
        # Load dataset using datasets.load_dataset()
        dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
        print("Dataset loaded using datasets.load_dataset().")
        
        df = dataset.to_pandas()
        
        df=df.reset_index(drop=True)
        
        df_cv=cv_issues_cleanvision(df)
        df = pd.concat([df, df_cv], axis=1)

        # Save dataset to cache
        #save df as parquet
        df.to_parquet(cache_file)

        print("Dataset saved to cache.")