Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	grouping
Browse files- preprocess.py +31 -16
    	
        preprocess.py
    CHANGED
    
    | @@ -5,21 +5,28 @@ import fiona | |
| 5 | 
             
            import geopandas as gpd
         | 
| 6 | 
             
            import rioxarray
         | 
| 7 | 
             
            from shapely.geometry import box
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 8 |  | 
| 9 | 
             
            # +
         | 
| 10 |  | 
| 11 | 
             
            fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
         | 
| 12 | 
             
            parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
         | 
| 13 | 
             
            # gdb = "https://data.source.coop/cboettig/pad-us-3/PADUS3/PAD_US3_0.gdb" # original, all tables
         | 
| 14 | 
            -
             | 
| 15 | 
            -
            con = ibis.duckdb.connect()
         | 
| 16 | 
            -
            con.load_extension("spatial")
         | 
| 17 | 
            -
            threads = -1
         | 
| 18 | 
            -
             | 
| 19 | 
             
            # or read the fgb version, much slower
         | 
| 20 | 
             
            # pad = con.read_geo(fgb)
         | 
| 21 | 
             
            # pad = con.read_parquet(parquet)
         | 
| 22 | 
             
            # Currently ibis doesn't detect that this is GeoParquet.  We need a SQL escape-hatch to cast the geometry
         | 
|  | |
|  | |
| 23 | 
             
            con.raw_sql(f"CREATE OR REPLACE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
         | 
| 24 | 
             
            pad = con.table("pad")
         | 
| 25 | 
             
            # -
         | 
| @@ -30,12 +37,9 @@ pad = con.table("pad") | |
| 30 | 
             
            meta = fiona.open(fgb)
         | 
| 31 | 
             
            crs = meta.crs
         | 
| 32 |  | 
| 33 | 
            -
            # +
         | 
| 34 | 
             
            ## optional getting bounds
         | 
| 35 | 
            -
            cog = "https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif"
         | 
| 36 | 
            -
             | 
| 37 | 
             
            # extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
         | 
| 38 | 
            -
            r = rioxarray.open_rasterio( | 
| 39 | 
             
            bounds = box(*r.rio.transform_bounds(crs))
         | 
| 40 |  | 
| 41 | 
             
            # +
         | 
| @@ -89,18 +93,29 @@ pad_grouping = ( | |
| 89 | 
             
                       )
         | 
| 90 | 
             
                .mutate(bucket = case)
         | 
| 91 | 
             
                .select(categorical_columns)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 92 | 
             
            )
         | 
| 93 |  | 
| 94 | 
             
            pad_grouping.to_parquet("pad-groupings.parquet")
         | 
| 95 | 
             
            # -
         | 
| 96 |  | 
| 97 | 
            -
            agency_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-name.parquet").select(manager_name_id = "Code", manager_name = "Dom")
         | 
| 98 | 
            -
            agency_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-type.parquet").select(manager_type_id = "Code", manager_type = "Dom")
         | 
| 99 | 
            -
            desig_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-desgination-type.parquet").select(designation_type_id = "Code", designation_type = "Dom")
         | 
| 100 | 
            -
            public_access = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-public-access.parquet").select(public_access_id = "Code", public_access = "Dom")
         | 
| 101 | 
            -
            state_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-state-name.parquet").select(state = "Code", state_name = "Dom")
         | 
| 102 | 
            -
            iucn = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-iucn.parquet").select(iucn_code = "CODE", iucn_category = "DOM")
         | 
| 103 | 
            -
             | 
| 104 | 
             
            (pad_parquet
         | 
| 105 | 
             
                .rename(manager_name_id = "Mang_Name", 
         | 
| 106 | 
             
                        manager_type_id = "Mang_Type", 
         | 
|  | |
| 5 | 
             
            import geopandas as gpd
         | 
| 6 | 
             
            import rioxarray
         | 
| 7 | 
             
            from shapely.geometry import box
         | 
| 8 | 
            +
            con = ibis.duckdb.connect()
         | 
| 9 | 
            +
            con.load_extension("spatial")
         | 
| 10 | 
            +
            threads = -1
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            agency_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-name.parquet").select(manager_name_id = "Code", manager_name = "Dom")
         | 
| 13 | 
            +
            agency_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-agency-type.parquet").select(manager_type_id = "Code", manager_type = "Dom")
         | 
| 14 | 
            +
            desig_type = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-desgination-type.parquet").select(designation_type_id = "Code", designation_type = "Dom")
         | 
| 15 | 
            +
            public_access = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-public-access.parquet").select(public_access_id = "Code", public_access = "Dom")
         | 
| 16 | 
            +
            state_name = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-state-name.parquet").select(state = "Code", state_name = "Dom")
         | 
| 17 | 
            +
            iucn = con.read_parquet("https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/parquet/pad-iucn.parquet").select(iucn_code = "CODE", iucn_category = "DOM")
         | 
| 18 |  | 
| 19 | 
             
            # +
         | 
| 20 |  | 
| 21 | 
             
            fgb = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.fgb"
         | 
| 22 | 
             
            parquet = "https://data.source.coop/cboettig/pad-us-3/pad-us3-combined.parquet"
         | 
| 23 | 
             
            # gdb = "https://data.source.coop/cboettig/pad-us-3/PADUS3/PAD_US3_0.gdb" # original, all tables
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 24 | 
             
            # or read the fgb version, much slower
         | 
| 25 | 
             
            # pad = con.read_geo(fgb)
         | 
| 26 | 
             
            # pad = con.read_parquet(parquet)
         | 
| 27 | 
             
            # Currently ibis doesn't detect that this is GeoParquet.  We need a SQL escape-hatch to cast the geometry
         | 
| 28 | 
            +
             | 
| 29 | 
            +
             | 
| 30 | 
             
            con.raw_sql(f"CREATE OR REPLACE VIEW pad AS SELECT *, st_geomfromwkb(geometry) as geom from read_parquet('{parquet}')")
         | 
| 31 | 
             
            pad = con.table("pad")
         | 
| 32 | 
             
            # -
         | 
|  | |
| 37 | 
             
            meta = fiona.open(fgb)
         | 
| 38 | 
             
            crs = meta.crs
         | 
| 39 |  | 
|  | |
| 40 | 
             
            ## optional getting bounds
         | 
|  | |
|  | |
| 41 | 
             
            # extract bounds. (in this case these are already in the same projection actually so r.rio.bounds() would work)
         | 
| 42 | 
            +
            r = rioxarray.open_rasterio("https://data.source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif")
         | 
| 43 | 
             
            bounds = box(*r.rio.transform_bounds(crs))
         | 
| 44 |  | 
| 45 | 
             
            # +
         | 
|  | |
| 93 | 
             
                       )
         | 
| 94 | 
             
                .mutate(bucket = case)
         | 
| 95 | 
             
                .select(categorical_columns)
         | 
| 96 | 
            +
                .rename(manager_name_id = "Mang_Name", 
         | 
| 97 | 
            +
                        manager_type_id = "Mang_Type", 
         | 
| 98 | 
            +
                        designation_type_id = "Des_Tp",
         | 
| 99 | 
            +
                        public_access_id = "Pub_Access",
         | 
| 100 | 
            +
                        category = "FeatClass",
         | 
| 101 | 
            +
                        iucn_code = "IUCN_Cat",
         | 
| 102 | 
            +
                        gap_code = "GAP_Sts",
         | 
| 103 | 
            +
                        state = "State_Nm",
         | 
| 104 | 
            +
                        easement_holder = "EsmtHldr",
         | 
| 105 | 
            +
                        date_established = "Date_Est",
         | 
| 106 | 
            +
                        area_name = "Unit_Nm")
         | 
| 107 | 
            +
                .left_join(agency_name, "manager_name_id")
         | 
| 108 | 
            +
                .left_join(agency_type, "manager_type_id")
         | 
| 109 | 
            +
                .left_join(desig_type, "designation_type_id")
         | 
| 110 | 
            +
                .left_join(public_access, "public_access_id")
         | 
| 111 | 
            +
                .left_join(state_name, "state")
         | 
| 112 | 
            +
                .left_join(iucn, "iucn_code")
         | 
| 113 | 
            +
                .select(~s.contains("_right"))
         | 
| 114 | 
             
            )
         | 
| 115 |  | 
| 116 | 
             
            pad_grouping.to_parquet("pad-groupings.parquet")
         | 
| 117 | 
             
            # -
         | 
| 118 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 119 | 
             
            (pad_parquet
         | 
| 120 | 
             
                .rename(manager_name_id = "Mang_Name", 
         | 
| 121 | 
             
                        manager_type_id = "Mang_Type", 
         | 

