|
| 1 | +import warnings |
| 2 | + |
1 | 3 | import geopandas as gpd
|
| 4 | +from pyogrio.errors import DataSourceError |
2 | 5 |
|
3 | 6 | from geospatial_tools import DATA_DIR
|
4 | 7 | from geospatial_tools.planetary_computer.sentinel_2 import BestProductsForFeatures
|
5 | 8 | from geospatial_tools.vector import (
|
6 | 9 | create_vector_grid_parallel,
|
| 10 | + dask_spatial_join, |
7 | 11 | select_polygons_by_location,
|
8 | 12 | to_geopackage,
|
9 | 13 | )
|
10 | 14 |
|
| 15 | +warnings.filterwarnings("ignore", category=FutureWarning) |
| 16 | + |
11 | 17 | ###
|
12 | 18 | # Base arguments
|
13 | 19 | ###
|
14 | 20 |
|
15 | 21 | # Base files
|
16 | 22 | USA_POLYGON_FILE = DATA_DIR / "usa_polygon_5070.gpkg"
|
| 23 | +# S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070_subset.gpkg" |
17 | 24 | S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070.gpkg"
|
18 | 25 |
|
19 | 26 | # Grid size for vector polygons
|
|
35 | 42 | # Max cloud cover to use as search parameter
|
36 | 43 | MAX_CLOUD_COVER = 15
|
37 | 44 |
|
| 45 | +# Processing arguments |
| 46 | +NUM_OF_WORKERS_VECTOR_GRID = 16 |
| 47 | +NUM_OF_WORKERS_SPATIAL_SELECT = 8 |
| 48 | + |
38 | 49 | ###
|
39 | 50 | # Base data
|
40 | 51 | ###
|
|
72 | 83 | # query the [Planetary Computer](https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a)
|
73 | 84 | # Sentinel 2 dataset and clip the selected Sentinel 2 images.
|
74 | 85 |
|
75 |
| - |
| 86 | +grid_800m_filename = DATA_DIR / "polygon_grid_800m_2.gpkg" |
76 | 87 | bbox = usa_polygon.total_bounds
|
77 |
| -grid_800m_filename = DATA_DIR / "polygon_grid_800m.gpkg" |
78 |
| -print("Starting processing for [create_vector_grid_parallel]") |
79 |
| -grid_800m = create_vector_grid_parallel(bounding_box=bbox, grid_size=GRID_SIZE, crs="EPSG:5070") |
80 |
| -print(f"Printing len(grid_parallel) to check if grid contains same amount of polygons : {len(grid_800m)}") |
| 88 | +try: |
| 89 | + print(f"Trying to load {grid_800m_filename}, if it exists") |
| 90 | + grid_800m = gpd.read_file(grid_800m_filename) |
| 91 | + print("Succeeded!") |
| 92 | +except DataSourceError: |
| 93 | + print(f"Failed to load {grid_800m_filename}") |
| 94 | + print("Starting processing for [create_vector_grid_parallel]") |
| 95 | + grid_800m = create_vector_grid_parallel( |
| 96 | + bounding_box=bbox, num_of_workers=NUM_OF_WORKERS_VECTOR_GRID, grid_size=GRID_SIZE, crs="EPSG:5070" |
| 97 | + ) |
| 98 | + to_geopackage(grid_800m, grid_800m_filename) |
| 99 | + print(f"Printing len(grid_parallel) to check if grid contains same amount of polygons : {len(grid_800m)}") |
81 | 100 |
|
82 | 101 | # Selecting the useful polygons
|
83 | 102 | #
|
|
91 | 110 | # big or too complex, it would be better off going through QGIS, PyGQIS, GDAL or
|
92 | 111 | # some other more efficient way to do this operation.
|
93 | 112 |
|
94 |
| -usa_polygon_grid_800m_filename = DATA_DIR / "usa_polygon_grid_800m.gpkg" |
95 |
| -print("Starting intersect selection") |
96 |
| -usa_polygon_grid_800m = select_polygons_by_location(grid_800m, usa_polygon) |
97 |
| -to_geopackage(usa_polygon_grid_800m, usa_polygon_grid_800m_filename) |
98 |
| - |
99 |
| -# ## Data processing pipeline prototype |
100 |
| -# ### Finding the best image for each S2 tiling grid |
101 |
| - |
102 |
| -# This is the full list of S2 grids |
103 |
| -s2_tile_grid_list = s2_grid["name"].to_list() |
104 |
| - |
105 |
| -### |
106 |
| -# Finding the best products for our subset use case |
107 |
| -### |
108 |
| - |
109 |
| -# `s2_feature_name_columns` is the name of the column in `s2_grid` where the id of |
110 |
| -# the different tiles is found. |
111 |
| -# |
112 |
| -# `vector_column_name` is the name of the column in which the best results will be stored |
| 113 | +usa_polygon_grid_800m_filename = DATA_DIR / "usa_polygon_grid_800m_dask_3.gpkg" |
| 114 | +try: |
| 115 | + print(f"Trying to load {usa_polygon_grid_800m_filename}, if it exists") |
| 116 | + usa_polygon_grid_800m = gpd.read_file(usa_polygon_grid_800m_filename) |
| 117 | + print("Succeeded!") |
| 118 | +except DataSourceError: |
| 119 | + print(f"Failed to load {usa_polygon_grid_800m_filename}") |
| 120 | + print("Starting intersect selection") |
| 121 | + usa_polygon_grid_800m = select_polygons_by_location( |
| 122 | + select_features_from=grid_800m, |
| 123 | + intersected_with=usa_polygon, |
| 124 | + num_of_workers=NUM_OF_WORKERS_SPATIAL_SELECT, |
| 125 | + join_function=dask_spatial_join, |
| 126 | + ) |
| 127 | + to_geopackage(usa_polygon_grid_800m, usa_polygon_grid_800m_filename) |
| 128 | + |
| 129 | +# Finding the best image for each S2 tiling grid |
113 | 130 |
|
114 | 131 | # Initiating our client
|
| 132 | +s2_tile_grid_list = s2_grid["name"].to_list() |
115 | 133 | best_products_client = BestProductsForFeatures(
|
116 | 134 | sentinel2_tiling_grid=s2_grid,
|
117 | 135 | sentinel2_tiling_grid_column=S2_FEATURE_NAME_COLUMN,
|
118 |
| - vector_features=usa_polygon_grid_800m_filename, |
| 136 | + vector_features=usa_polygon_grid_800m, |
119 | 137 | vector_features_column=VECTOR_COLUMN_NAME,
|
120 | 138 | max_cloud_cover=MAX_CLOUD_COVER,
|
121 | 139 | )
|
122 | 140 |
|
123 | 141 | # Executing the search
|
124 | 142 | #
|
125 |
| -# This search look only for complete products, meaning products with less than |
| 143 | +# This search looks only for complete products, meaning products with less than |
126 | 144 | # 5 percent of nodata.
|
127 | 145 |
|
128 | 146 | best_products_client.create_date_ranges(START_YEAR, END_YEAR, START_MONTH, END_MONTH)
|
129 | 147 | products = best_products_client.find_best_complete_products()
|
| 148 | +best_products_client.to_file() |
130 | 149 |
|
131 | 150 | # Selecting the best products for each vector tile
|
| 151 | +# |
132 | 152 | # This step is necessary as some of our vector polygons can be withing multiple S2 tiles.
|
133 | 153 | # The best available S2 tile is therefore selected for each vector polygon.
|
134 | 154 |
|
135 |
| -best_results_path = DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg" |
| 155 | +best_results_path = DATA_DIR / "vector_tiles_800m_with_s2tiles_dask_3.gpkg" |
136 | 156 | best_results = best_products_client.select_best_products_per_feature()
|
137 |
| -to_geopackage(best_results, DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg") |
138 | 157 |
|
139 | 158 | # Writing the results to file
|
140 |
| -best_products_client.to_file() |
| 159 | +to_geopackage(best_results, best_results_path) |
0 commit comments