Spaces:
Runtime error
Runtime error
File size: 2,362 Bytes
c6a85a5 5ee8932 c6a85a5 5ee8932 c6a85a5 5ee8932 c6a85a5 5ee8932 c6a85a5 5ee8932 c6a85a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import pickle
from renumics import spotlight
import os
import requests
import pandas as pd
from renumics import spotlight
from renumics.spotlight.analysis import DataIssue
if __name__ == "__main__":
cache_file = "dataset_cache.pkl"
if os.path.exists(cache_file):
# Load dataset from cache
with open(cache_file, "rb") as file:
df = pickle.load(file)
print("Dataset loaded from cache.")
label_issue_rows = df[df["is_label_issue"]].sort_values("label_score").index.tolist()
label_issue = DataIssue(
severity="medium",
title="label-issue",
rows=label_issue_rows,
description="Label issue found by cleanlab - Review and correct if necessary",
)
outlier_issue_row = (
df[df["outlier_score"] < 0.6].sort_values("outlier_score").index.tolist()
)
outlier_issue = DataIssue(
severity="medium",
title="outlier-issue",
rows=outlier_issue_row,
description="Outlier score < 0.6 - Review and remove or collect more data",
)
near_duplicate_issue_row = (
df[df["is_near_duplicate_issue"]].sort_values("near_duplicate_score").index.tolist()
)
near_duplicate_issue = DataIssue(
severity="medium",
title="near-duplicate-issue",
rows=near_duplicate_issue_row,
description="Near duplicate issue found by cleanlab - Review and remove if necessary",
)
while True:
dtypes = {
"image": spotlight.Image,
"image_full": spotlight.Image,
"embedding": spotlight.Embedding,
"embedding_reduced": spotlight.Embedding,
"probabilities": spotlight.Embedding,
}
view = spotlight.show(
df.rename(columns={"fine_label_str": "label", "fine_label_prediction_str":"pred"}),
dtype=dtypes,
issues=[label_issue,outlier_issue,near_duplicate_issue],
layout="layout.json",
port=7860,
host="0.0.0.0",
allow_filebrowsing=False,
)
view.close()
else:
print(f"Dataset {cache_file} not found. Please run prepare.py first.")
|