cboettig commited on
Commit
4f08a04
·
1 Parent(s): 91489eb
Files changed (4) hide show
  1. .gitignore +10 -0
  2. gbif-app.py +167 -0
  3. gbif.py +162 -0
  4. get_gbif.py +42 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ .Rproj.user
2
+ .Rhistory
3
+ .RData
4
+ .Ruserdata
5
+ .ipynb_checkpoints
6
+ *.Rproj
7
+ *.vrt
8
+ *.tif
9
+ *.sql
10
+ __pycache__
gbif-app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ """An example of showing geographic data."""
15
+
16
+ import os
17
+
18
+ # +
19
+ import altair as alt
20
+ import numpy as np
21
+ import pandas as pd
22
+ import pydeck as pdk
23
+ import streamlit as st
24
+
25
+ import ibis
26
+ from ibis import _
27
+ # -
28
+
29
+ # SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
30
+ st.set_page_config(layout="wide", page_title="GBIF Biodiversity Demo", page_icon=":butterfly:")
31
+
32
+
33
+
34
+ # LOAD DATA ONCE
35
+ @st.cache_resource
36
+ def load_data():
37
+ con = ibis.duckdb.connect()
38
+ path = "butterflies_gb.parquet"
39
+ if os.path.isfile(path):
40
+ return con.read_parquet(path)
41
+ df = con.read_parquet("s3://anonymous@gbif-open-data-us-east-1/occurrence/2023-06-01/occurrence.parquet")
42
+ data = (df
43
+ .dropna(["decimallongitude", "decimallatitude", "year"], how="any")
44
+ .filter([_.order == "Lepidoptera", _.countrycode=="GB"])
45
+ .filter(_.year >= 2010)
46
+ .select(['year', 'decimallongitude', 'decimallatitude'])
47
+ )
48
+ return data
49
+
50
+
51
+ def map(data, lat, lon, zoom):
52
+ st.write(
53
+ pdk.Deck(
54
+ map_style="mapbox://styles/mapbox/light-v9",
55
+ initial_view_state={
56
+ "latitude": lat,
57
+ "longitude": lon,
58
+ "zoom": zoom,
59
+ "pitch": 50,
60
+ },
61
+ layers=[
62
+ pdk.Layer(
63
+ "HexagonLayer",
64
+ data=data,
65
+ get_position=["decimallongitude", "decimallatitude"],
66
+ radius=1000,
67
+ elevation_scale=100,
68
+ pickable=True,
69
+ extruded=True,
70
+ ),
71
+ ],
72
+ )
73
+ )
74
+
75
+
76
+ # +
77
+ # FILTER DATA FOR A SPECIFIC YEAR. ibis lazytable not cache-able..
78
+
79
+ # @st.cache_data
80
+ def filterdata(df, year):
81
+ return df.filter(_.year == year).to_pandas()
82
+
83
+
84
+ # -
85
+
86
+ # CALCULATE MIDPOINT FOR GIVEN SET OF DATA
87
+ @st.cache_data
88
+ def mpoint(lat, lon):
89
+ return (np.average(lat), np.average(lon))
90
+
91
+
92
+
93
+ # STREAMLIT APP LAYOUT
94
+ data = load_data()
95
+
96
+ # LAYING OUT THE TOP SECTION OF THE APP
97
+ row1_1, row1_2 = st.columns((2, 3))
98
+
99
+ # SEE IF THERE'S A QUERY PARAM IN THE URL (e.g. ?pickup_hour=2)
100
+ # THIS ALLOWS YOU TO PASS A STATEFUL URL TO SOMEONE WITH A SPECIFIC HOUR SELECTED,
101
+ # E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
102
+ if not st.session_state.get("url_synced", False):
103
+ try:
104
+ year = int(st.query_params["year"][0])
105
+ st.session_state["year"] = year
106
+ st.session_state["url_synced"] = True
107
+ except KeyError:
108
+ pass
109
+
110
+
111
+ # IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
112
+ def update_query_params():
113
+ year_selected = st.session_state["year"]
114
+ st.query_params["year"]=year_selected
115
+
116
+
117
+ with row1_1:
118
+ st.title("GBIF Butterfly Occurances")
119
+ year_selected = st.slider(
120
+ "Select year", 2010, 2023, key="year", on_change=update_query_params
121
+ )
122
+
123
+
124
+ with row1_2:
125
+ st.write(
126
+ """
127
+ ##
128
+ By sliding the slider on the left you can view different slices of time and explore different trends.
129
+ """
130
+ )
131
+
132
+ # LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
133
+ row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
134
+
135
+ # +
136
+ # SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
137
+
138
+ midpoint = (52.0, -1.0) #mpoint(data["lat"], data["lon"])
139
+ # -
140
+
141
+
142
+
143
+ with row2_1:
144
+ st.write(
145
+ f"""**Large Map**"""
146
+ )
147
+ map(filterdata(data, year_selected), midpoint[0], midpoint[1], 4)
148
+
149
+ with row2_2:
150
+ st.write("**Panel 1**")
151
+ map(filterdata(data, year_selected), midpoint[0], midpoint[1], 3)
152
+
153
+ with row2_3:
154
+ st.write("**Panel 2**")
155
+ map(filterdata(data, year_selected), midpoint[0], midpoint[1], 2)
156
+
157
+ with row2_4:
158
+ st.write("**Panel 3**")
159
+ map(filterdata(data, year_selected), midpoint[0], midpoint[1], 1)
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
gbif.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ibis
2
+ from ibis import _
3
+ import pydeck
4
+
5
+
6
+ # +
7
+ def connect_data():
8
+ con = ibis.duckdb.connect()
9
+ con.raw_sql('''
10
+ INSTALL httpfs;
11
+ LOAD httpfs;
12
+ LOAD 'build/release/extension/h3ext/h3ext.duckdb_extension';
13
+ SET s3_url_style='path';
14
+ SET s3_endpoint='minio.carlboettiger.info';
15
+ CREATE VIEW gb AS SELECT * FROM read_parquet('s3://shared-data/gbif_gb/**');
16
+ ''')
17
+ return con
18
+
19
+
20
+ '''
21
+ CREATE VIEW gbif AS SELECT * FROM read_parquet('s3://gbif/*');
22
+
23
+
24
+ INSTALL httpfs;
25
+ LOAD httpfs;
26
+ SET s3_url_style='path';
27
+ SET s3_endpoint='minio.carlboettiger.info';
28
+
29
+
30
+
31
+ SET temp_directory='/tmp/duckdb';
32
+
33
+
34
+ SET memory_limit = '150GB';
35
+ SET max_memory = '150GB';
36
+ COPY
37
+ (
38
+ SELECT *,
39
+ hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 1)) as h3z1,
40
+ hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 2)) as h3z2,
41
+ hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 3)) as h3z3,
42
+ hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 4)) as h3z4,
43
+ hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 5)) as h3z5,
44
+ hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 6)) as h3z6,
45
+ hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 7)) as h3z7
46
+ FROM gbif
47
+ WHERE (NOT((decimallatitude IS NULL))) AND (NOT((decimallongitude IS NULL))) AND (countrycode = 'US')
48
+ ) TO 's3://shared-data/gbif/US' (FORMAT 'parquet', PARTITION_BY h3z1);
49
+
50
+ '''
51
+
52
+
53
+
54
+ # distinct species observations at h7 resolution
55
+ def richness_data(con):
56
+ data = (
57
+ con.table("gb").
58
+ filter(_.phylum == "Chordata").
59
+ select(_.genus, _.species, _["class"], _.h3z2, _.h3z3, _.h3z4, _.h3z5, _.h3z6, _.h3z7).
60
+ distinct().
61
+ to_parquet("gb-cache.parquet")
62
+ )
63
+ return data
64
+
65
+ con = connect_data()
66
+ richness_data(con)
67
+ # -
68
+
69
+
70
+
71
+ # +
72
+
73
+
74
+ def zoom_data(zoom=6):
75
+ hzoom = "h3z" + str(zoom)
76
+ data = (
77
+ con.read_parquet("gb-cache.parquet").
78
+ rename(h3 = hzoom).
79
+ group_by([_.h3, _["class"]]).
80
+ aggregate(n = _.count()).
81
+ to_csv("gbif-vert-gb-" + hzoom + ".csv")
82
+ )
83
+ return data
84
+
85
+
86
+ def filterdata(df, year):
87
+ return df[df.year == year]
88
+
89
+ zoom_data(4)
90
+ zoom_data(5)
91
+ zoom_data(6)
92
+ zoom_data(7)
93
+
94
+
95
+ # +
96
+ def load_data(zoom=7):
97
+ con = ibis.duckdb.connect()
98
+ path = "gbif-vert-gb-h3z" + str(zoom) + ".csv"
99
+ df_all = (
100
+ con.
101
+ read_csv(path).
102
+ group_by(_.h3).
103
+ aggregate(n = _.n.sum()).
104
+ mutate(color = 255 * _.n / _.n.max()).
105
+ to_pandas()
106
+ )
107
+ return df_all
108
+
109
+ def load_class(taxa="Amphibia", zoom=7):
110
+ con = ibis.duckdb.connect()
111
+ path = "gbif-vert-gb-h3z" + str(zoom) + ".csv"
112
+ df = (con.
113
+ read_csv(path).
114
+ filter(_['class']==taxa).
115
+ mutate(color = 255 * _.n / _.n.max()).
116
+ to_pandas()
117
+ )
118
+ return df
119
+
120
+ df = load_data()
121
+ df
122
+
123
+ # +
124
+ # Define a layer to display on a map
125
+
126
+ import pydeck as pdk
127
+
128
+ # Set the viewport location
129
+ view_state = pdk.ViewState(
130
+ longitude=-1.415,
131
+ latitude=52.2323,
132
+ zoom=4,
133
+ min_zoom=1,
134
+ max_zoom=12,
135
+ pitch=40.5,
136
+ bearing=-27.36)
137
+
138
+ def map(data):
139
+ layer = pdk.Layer(
140
+ "H3HexagonLayer",
141
+ data,
142
+ pickable=True,
143
+ stroked=True,
144
+ filled=True,
145
+ extruded=True,
146
+ elevation_scale=100,
147
+ get_elevation='color',
148
+ get_hexagon="h3",
149
+ get_fill_color="[color, 30, 255 - color, 160]",
150
+ get_line_color=[255, 255, 255],
151
+ line_width_min_pixels=2,
152
+ )
153
+
154
+ # Render
155
+ r = pdk.Deck(layers=[layer], initial_view_state=view_state)
156
+ return r.to_html("hex_layer.html")
157
+
158
+
159
+
160
+
161
+
162
+ map(df)
get_gbif.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ibis
2
+ from ibis import _
3
+ import os
4
+
5
+
6
+
7
+ # +
8
+
9
+ import pydeck as pdk
10
+
11
+ # Set the viewport location
12
+ view_state = pdk.ViewState(
13
+ longitude=-1.415,
14
+ latitude=52.2323,
15
+ zoom=5,
16
+ min_zoom=1,
17
+ max_zoom=12,
18
+ pitch=40.5,
19
+ bearing=-27.36)
20
+
21
+ def map(data):
22
+ layer = pdk.Layer(
23
+ "H3HexagonLayer",
24
+ data,
25
+ pickable=True,
26
+ stroked=True,
27
+ filled=True,
28
+ extruded=True,
29
+ elevation_range=[0,200],
30
+ elevation_scale=2000,
31
+ get_elevation='color',
32
+ get_hexagon="h3z7",
33
+ get_fill_color="[color, 30, color]",
34
+ get_line_color=[255, 255, 255],
35
+ line_width_min_pixels=2,
36
+ )
37
+
38
+ # Render
39
+ r = pdk.Deck(layers=[layer], initial_view_state=view_state)
40
+ return r.to_html("hex_layer.html")
41
+
42
+ map("https://huggingface.co/spaces/cboettig/hf-streamlit-demo/resolve/main/test.csv")