import ibis from ibis import _ import pydeck # + def connect_data(): con = ibis.duckdb.connect() con.raw_sql(''' INSTALL httpfs; LOAD httpfs; LOAD 'build/release/extension/h3ext/h3ext.duckdb_extension'; SET s3_url_style='path'; SET s3_endpoint='minio.carlboettiger.info'; CREATE VIEW gb AS SELECT * FROM read_parquet('s3://shared-data/gbif_gb/**'); ''') return con ''' CREATE VIEW gbif AS SELECT * FROM read_parquet('s3://gbif/*'); INSTALL httpfs; LOAD httpfs; SET s3_url_style='path'; SET s3_endpoint='minio.carlboettiger.info'; SET temp_directory='/tmp/duckdb'; SET memory_limit = '150GB'; SET max_memory = '150GB'; COPY ( SELECT *, hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 1)) as h3z1, hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 2)) as h3z2, hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 3)) as h3z3, hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 4)) as h3z4, hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 5)) as h3z5, hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 6)) as h3z6, hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 7)) as h3z7 FROM gbif WHERE (NOT((decimallatitude IS NULL))) AND (NOT((decimallongitude IS NULL))) AND (countrycode = 'US') ) TO 's3://shared-data/gbif/US' (FORMAT 'parquet', PARTITION_BY h3z1); ''' # distinct species observations at h7 resolution def richness_data(con): data = ( con.table("gb"). filter(_.phylum == "Chordata"). select(_.genus, _.species, _["class"], _.h3z2, _.h3z3, _.h3z4, _.h3z5, _.h3z6, _.h3z7). distinct(). to_parquet("gb-cache.parquet") ) return data con = connect_data() richness_data(con) # - # + def zoom_data(zoom=6): hzoom = "h3z" + str(zoom) data = ( con.read_parquet("gb-cache.parquet"). rename(h3 = hzoom). group_by([_.h3, _["class"]]). aggregate(n = _.count()). to_csv("gbif-vert-gb-" + hzoom + ".csv") ) return data def filterdata(df, year): return df[df.year == year] zoom_data(4) zoom_data(5) zoom_data(6) zoom_data(7) # + def load_data(zoom=7): con = ibis.duckdb.connect() path = "gbif-vert-gb-h3z" + str(zoom) + ".csv" df_all = ( con. read_csv(path). group_by(_.h3). aggregate(n = _.n.sum()). mutate(color = 255 * _.n / _.n.max()). to_pandas() ) return df_all def load_class(taxa="Amphibia", zoom=7): con = ibis.duckdb.connect() path = "gbif-vert-gb-h3z" + str(zoom) + ".csv" df = (con. read_csv(path). filter(_['class']==taxa). mutate(color = 255 * _.n / _.n.max()). to_pandas() ) return df df = load_data() df # + # Define a layer to display on a map import pydeck as pdk # Set the viewport location view_state = pdk.ViewState( longitude=-1.415, latitude=52.2323, zoom=4, min_zoom=1, max_zoom=12, pitch=40.5, bearing=-27.36) def map(data): layer = pdk.Layer( "H3HexagonLayer", data, pickable=True, stroked=True, filled=True, extruded=True, elevation_scale=100, get_elevation='color', get_hexagon="h3", get_fill_color="[color, 30, 255 - color, 160]", get_line_color=[255, 255, 255], line_width_min_pixels=2, ) # Render r = pdk.Deck(layers=[layer], initial_view_state=view_state) return r.to_html("hex_layer.html") map(df)