gera-richarte commited on
Commit
6d787c4
1 Parent(s): 2f36b65

lets try this first version

Browse files
Files changed (2) hide show
  1. app.py +175 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset, IterableDataset
2
+ from functools import partial
3
+ from pandas import DataFrame
4
+ import tqdm
5
+ import json
6
+
7
+ import numpy as np
8
+ import gradio as gr
9
+
10
+ DEBUG = False
11
+
12
+ sets = {
13
+ "satellogic": {
14
+ "shards" : 3676,
15
+ },
16
+ "sentinel_1": {
17
+ "shards" : 1763,
18
+ },
19
+ "neon": {
20
+ "config" : "default",
21
+ "shards" : 607,
22
+ "path" : "data",
23
+ }
24
+ }
25
+
26
+ def open_dataset(dataset, set_name, split, batch_size, shard = -1):
27
+ global dsi, ds
28
+
29
+ if shard == -1:
30
+ data_files = None
31
+ shards = 100
32
+ else:
33
+ config = sets[set_name].get("config", set_name)
34
+ shards = sets[set_name]["shards"]
35
+ path = sets[set_name].get("path", set_name)
36
+ data_files = {"train":[f"{path}/{split}-{shard:05d}-of-{shards:05d}.parquet"]}
37
+
38
+ if DEBUG:
39
+ ds = lambda:None
40
+ ds.n_shards = 1234
41
+ dsi = range(100)
42
+ else:
43
+ ds = load_dataset(
44
+ dataset,
45
+ config,
46
+ split=split,
47
+ cache_dir="dataset",
48
+ data_files=data_files,
49
+ streaming=True)
50
+
51
+ dsi = iter(ds)
52
+
53
+ return (
54
+ gr.update(label=f"Shards (max {shards})", value=shard, maximum=shards),
55
+ *get_images(batch_size)
56
+ )
57
+
58
+ def get_images(batch_size):
59
+ global dsi
60
+
61
+ items = []
62
+ metadatas = []
63
+
64
+ for i in tqdm.trange(batch_size, desc=f"Getting images"):
65
+ if DEBUG:
66
+ image = np.random.randint(0,255,(384,384,3))
67
+ metadata = {"bounds":[[1,1,4,4]], }
68
+ else:
69
+ try:
70
+ item = next(dsi)
71
+ except StopIteration:
72
+ break
73
+ metadata = item["metadata"]
74
+ if ds.config_name == "satellogic":
75
+ image = np.asarray(item["rgb"][0]).astype(np.uint8)
76
+ items.append(image.transpose(1,2,0))
77
+ if ds.config_name == "sentinel_1":
78
+ metadata = json.loads(metadata)
79
+ data = np.asarray(item["10m"])
80
+ for i in range(data.shape[0]):
81
+ # Mapping of V and H to RGB. May not be correct
82
+ # https://gis.stackexchange.com/questions/400726/creating-composite-rgb-images-from-sentinel-1-channels
83
+ image = np.zeros((3,384,384), "uint8")
84
+ image[0] = data[i][0]
85
+ image[1] = data[i][1]
86
+ image[2] = (image[0]/(image[1]+0.1))*256
87
+ items.append(image.transpose(1,2,0))
88
+ if ds.config_name == "default":
89
+ dataRGB = np.asarray(item["rgb"]).astype("uint8")
90
+ dataCHM = np.asarray(item["chm"]).astype("uint8")
91
+ data1m = np.asarray(item["1m"]).astype("uint8")
92
+ for i in range(dataRGB.shape[0]):
93
+ image = dataRGB[i,:,:,:]
94
+ items.append(image.transpose(1,2,0))
95
+
96
+ image = dataCHM[i,0,:,:]
97
+ items.append(image)
98
+
99
+ image = data1m[i,0,:,:]
100
+ items.append(image)
101
+ metadatas.append(metadata)
102
+
103
+ return items, DataFrame(metadatas)
104
+
105
+ def skip(count, batch_size):
106
+ global dsi
107
+
108
+ skip = count*batch_size
109
+
110
+ gr.Info(f"Skipping {skip} images (it's slow)")
111
+ for i in tqdm.trange(skip, desc=f"Skipping {skip} images"):
112
+ if DEBUG:
113
+ pass
114
+ else:
115
+ next(dsi)
116
+
117
+ return get_images(batch_size)
118
+
119
+ def update_shape(rows, columns):
120
+ return gr.update(rows=rows, columns=columns)
121
+
122
+
123
+ with gr.Blocks(title="Dataset Explorer", fill_height = True) as demo:
124
+ batch_size = gr.Number(10, label = "Batch Size", render=False)
125
+ shard = gr.Slider(label="Shard", minimum=0, maximum=10000, step=1, render=False)
126
+ table = gr.DataFrame(render = False)
127
+ # headers=["Index","TimeStamp","Bounds","CRS"],
128
+
129
+ gallery = gr.Gallery(
130
+ label="satellogic/EarthView",
131
+ interactive=False,
132
+ columns=5, rows=2, render=False)
133
+
134
+ with gr.Row():
135
+ dataset = gr.Textbox(label="Dataset", value="satellogic/EarthView")
136
+ config = gr.Dropdown(choices=["satellogic", "sentinel_1", "neon"], label="Subset", value="satellogic", )
137
+ split = gr.Textbox(label="Split", value="train")
138
+ initial_shard = gr.Number(label = "Initial shard", value=0)
139
+
140
+ gr.Button("Load (minutes)").click(
141
+ open_dataset,
142
+ inputs=[dataset, config, split, batch_size, initial_shard],
143
+ outputs=[shard, gallery, table])
144
+
145
+ gallery.render()
146
+
147
+ with gr.Row():
148
+ batch_size.render()
149
+
150
+ rows = gr.Number(2, label="Rows")
151
+ columns = gr.Number(5, label="Coluns")
152
+
153
+ rows.change(update_shape, [rows, columns], [gallery])
154
+ columns.change(update_shape, [rows, columns], [gallery])
155
+
156
+ with gr.Row():
157
+ shard.render()
158
+ shard.release(
159
+ open_dataset,
160
+ inputs=[dataset, config, split, batch_size, shard],
161
+ outputs=[shard, gallery, table])
162
+
163
+ btn = gr.Button("Get More Images", scale=0)
164
+ btn.click(get_images, [batch_size], [gallery, table])
165
+ btn.click()
166
+
167
+ # btn = gr.Button("Skip 10 Batches", scale=0)
168
+ # btn.click(partial(skip, 10), [batch], gallery)
169
+
170
+ # btn = gr.Button("Skip 25 Batches", scale=0)
171
+ # btn.click(partial(skip, 25), [batch], gallery)
172
+
173
+ table.render()
174
+
175
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ datasets
2
+ pandas
3
+ gradio
4
+ numpy