sps44 commited on
Commit
17f183d
·
1 Parent(s): 619763d

added cv enrichments

Browse files
Files changed (3) hide show
  1. cifar100-enrichment-cv.parquet +3 -0
  2. prepare.py +2 -15
  3. run.py +10 -1
cifar100-enrichment-cv.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc7e67b598b765cce75309f7c414a13ca9fcc8004f436a0490822c899bfa66c
3
+ size 544628
prepare.py CHANGED
@@ -4,15 +4,6 @@ import os
4
  import pandas as pd
5
  from cleanvision.imagelab import Imagelab
6
 
7
- def cv_issues_cleanvision(df, image_name='image'):
8
-
9
- image_paths = df['image'].to_list()
10
- imagelab = Imagelab(filepaths=image_paths)
11
- imagelab.find_issues()
12
-
13
- df_cv=imagelab.issues.reset_index()
14
-
15
- return df_cv
16
 
17
  if __name__ == "__main__":
18
  cache_file = "dataset_cache.parquet"
@@ -25,12 +16,8 @@ if __name__ == "__main__":
25
  dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
26
  print("Dataset loaded using datasets.load_dataset().")
27
 
28
- df = dataset.to_pandas()
29
-
30
- df=df.reset_index(drop=True)
31
-
32
- df_cv=cv_issues_cleanvision(df)
33
- df = pd.concat([df, df_cv], axis=1)
34
 
35
 
36
  # Save dataset to cache
 
4
  import pandas as pd
5
  from cleanvision.imagelab import Imagelab
6
 
 
 
 
 
 
 
 
 
 
7
 
8
  if __name__ == "__main__":
9
  cache_file = "dataset_cache.parquet"
 
16
  dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
17
  print("Dataset loaded using datasets.load_dataset().")
18
 
19
+ df = dataset.to_pandas()
20
+
 
 
 
 
21
 
22
 
23
  # Save dataset to cache
run.py CHANGED
@@ -6,10 +6,15 @@ import pandas as pd
6
 
7
  if __name__ == "__main__":
8
  cache_file = "dataset_cache.parquet"
 
 
 
9
  if os.path.exists(cache_file):
10
  # Load dataset from cache
11
  df = pd.read_parquet(cache_file)
12
 
 
 
13
  print("Dataset loaded from cache.")
14
  else:
15
  # Load dataset using datasets.load_dataset()
@@ -25,7 +30,11 @@ if __name__ == "__main__":
25
 
26
  print("Dataset saved to cache.")
27
 
28
-
 
 
 
 
29
  #df = dataset.to_pandas()
30
  df_show = df.drop(columns=['embedding', 'probabilities'])
31
  while True:
 
6
 
7
  if __name__ == "__main__":
8
  cache_file = "dataset_cache.parquet"
9
+ cache_file_enrichment="cifar100-enrichment-cv.parquet"
10
+ cache_file_issues="sliceline.pkl"
11
+
12
  if os.path.exists(cache_file):
13
  # Load dataset from cache
14
  df = pd.read_parquet(cache_file)
15
 
16
+
17
+
18
  print("Dataset loaded from cache.")
19
  else:
20
  # Load dataset using datasets.load_dataset()
 
30
 
31
  print("Dataset saved to cache.")
32
 
33
+ df_cv=pd.read_parquet(cache_file)
34
+
35
+ with open(issue_cache_file, "rb") as issue_file:
36
+ issues = pickle.load(issue_file)
37
+
38
  #df = dataset.to_pandas()
39
  df_show = df.drop(columns=['embedding', 'probabilities'])
40
  while True: