import pickle import datasets import os from cleanvision.imagelab import Imagelab import pandas as pd def cv_issues_cleanvision(df, image_name='image'): image_paths = df['image'].to_list() imagelab = Imagelab(filepaths=image_paths) imagelab.find_issues() df_cv=imagelab.issues.reset_index() return df_cv if __name__ == "__main__": cache_file = "dataset_cache.parquet" if os.path.exists(cache_file): # Load dataset from cache df = pd.read_parquet(cache_file) print("Dataset loaded from cache.") else: # Load dataset using datasets.load_dataset() dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test") print("Dataset loaded using datasets.load_dataset().") df = dataset.to_pandas() df=df.reset_index(drop=True) df_cv=cv_issues_cleanvision(df) df = pd.concat([df, df_cv], axis=1) # Save dataset to cache #save df as parquet df.to_parquet(cache_file) print("Dataset saved to cache.")