cassiebuhler commited on
Commit
7289e5d
·
1 Parent(s): 1488221

script for rounding data

Browse files
Files changed (1) hide show
  1. preprocess_part2.ipynb +152 -0
preprocess_part2.ipynb ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "6e29e27b-6baa-40ac-bbf8-230da2f94d17",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Rounding data\n",
9
+ "\n",
10
+ "This should've been in the original preprocessing script, but the decision to round the data came much later so I'm including this as an addition."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "81f3a57e-fa63-43b4-90f9-407f03abae79",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import ibis\n",
21
+ "from ibis import _\n",
22
+ "import ibis.selectors as s\n",
23
+ "parquet = \"https://huggingface.co/spaces/boettiger-lab/pad-us/resolve/575a4505f3eb1703070977d9d26f6a770045309c/pad-stats.parquet\"\n",
24
+ "con = ibis.duckdb.connect(extensions=[\"spatial\"])"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "b4745567-45db-406c-9b08-d72a97908d04",
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "#rounding data with ibis\n",
35
+ "us = (con\n",
36
+ " .read_parquet(parquet)\n",
37
+ " .cast({\"geometry\": \"geometry\"})\n",
38
+ " .mutate(geometry=_.geometry.convert(\n",
39
+ " \"+proj=moll +lon_0=0 +datum=WGS84 +units=m +no_defs\",\n",
40
+ " \"epsg:4326\"\n",
41
+ " ))\n",
42
+ " .mutate(richness=_.richness.round(3),\n",
43
+ " rsr=_.rsr.round(3),\n",
44
+ " all_species_rwr=_.all_species_rwr.round(3),\n",
45
+ " all_species_richness=_.all_species_richness.round(3),\n",
46
+ " manageable_carbon=_.manageable_carbon.round(3),\n",
47
+ " irrecoverable_carbon = _.irrecoverable_carbon.round(3),\n",
48
+ " human_impact=_.human_impact.round(3),\n",
49
+ " deforest_carbon=_.deforest_carbon.round(3),\n",
50
+ " biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
51
+ " forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
52
+ " crop_reduction =_.crop_reduction.round(3), \n",
53
+ " crop_expansion =_.crop_expansion.round(3)\n",
54
+ " )\n",
55
+ " )"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "markdown",
60
+ "id": "5985e893-ed21-487c-a609-e449edae9012",
61
+ "metadata": {},
62
+ "source": [
63
+ "# Save as PMTiles + Upload data"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "id": "100db9ae-e167-45ed-8c44-6205e5630923",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "import subprocess\n",
74
+ "import os\n",
75
+ "from huggingface_hub import HfApi, login\n",
76
+ "import streamlit as st\n",
77
+ "\n",
78
+ "login(st.secrets[\"HF_TOKEN\"])\n",
79
+ "api = HfApi()\n",
80
+ "\n",
81
+ "def hf_upload(file, repo_id,repo_type):\n",
82
+ " info = api.upload_file(\n",
83
+ " path_or_fileobj=file,\n",
84
+ " path_in_repo=file,\n",
85
+ " repo_id=repo_id,\n",
86
+ " repo_type=repo_type,\n",
87
+ " )\n",
88
+ "def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
89
+ " # Ensure Tippecanoe is installed\n",
90
+ " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
91
+ " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
92
+ "\n",
93
+ " # Construct the Tippecanoe command\n",
94
+ " command = [\n",
95
+ " \"tippecanoe\",\n",
96
+ " \"-o\", output_file,\n",
97
+ " \"-zg\",\n",
98
+ " \"--extend-zooms-if-still-dropping\",\n",
99
+ " \"--force\",\n",
100
+ " \"--projection\", \"EPSG:4326\", \n",
101
+ " \"-L\",\"pad-stats:\"+input_file,\n",
102
+ " ]\n",
103
+ " # Run Tippecanoe\n",
104
+ " try:\n",
105
+ " subprocess.run(command, check=True)\n",
106
+ " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
107
+ " except subprocess.CalledProcessError as e:\n",
108
+ " print(f\"Error running Tippecanoe: {e}\")"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": null,
114
+ "id": "eaf1a1cf-a5db-462c-a257-c68547b35d4d",
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "gdf = us.execute().set_crs(\"EPSG:4326\")\n",
119
+ "\n",
120
+ "gdf.to_file(\"pad-stats.geojson\")\n",
121
+ "generate_pmtiles(\"pad-stats.geojson\", \"pad-stats.pmtiles\")\n",
122
+ "hf_upload(\"pad-stats.pmtiles\", \"boettiger-lab/pad-us-3\", \"dataset\")\n",
123
+ "\n",
124
+ "gdf.to_parquet(\"pad-stats.parquet\")\n",
125
+ "hf_upload(\"pad-stats.parquet\", \"boettiger-lab/pad-us-3\", \"dataset\")\n",
126
+ "hf_upload(\"pad-stats.parquet\", \"boettiger-lab/pad-us\", \"space\") # redundant but I want a local copy for testing\n",
127
+ "\n"
128
+ ]
129
+ }
130
+ ],
131
+ "metadata": {
132
+ "kernelspec": {
133
+ "display_name": "Python 3 (ipykernel)",
134
+ "language": "python",
135
+ "name": "python3"
136
+ },
137
+ "language_info": {
138
+ "codemirror_mode": {
139
+ "name": "ipython",
140
+ "version": 3
141
+ },
142
+ "file_extension": ".py",
143
+ "mimetype": "text/x-python",
144
+ "name": "python",
145
+ "nbconvert_exporter": "python",
146
+ "pygments_lexer": "ipython3",
147
+ "version": "3.12.7"
148
+ }
149
+ },
150
+ "nbformat": 4,
151
+ "nbformat_minor": 5
152
+ }