|
| 1 | +import geopandas as gpd |
| 2 | + |
| 3 | +from geospatial_tools import DATA_DIR |
| 4 | +from geospatial_tools.planetary_computer.sentinel_2 import BestProductsForFeatures |
| 5 | +from geospatial_tools.vector import ( |
| 6 | + create_vector_grid_parallel, |
| 7 | + select_polygons_by_location, |
| 8 | + to_geopackage, |
| 9 | +) |
| 10 | + |
| 11 | +### |
| 12 | +# Base arguments |
| 13 | +### |
| 14 | + |
| 15 | +# Base files |
| 16 | +USA_POLYGON_FILE = DATA_DIR / "usa_polygon_5070.gpkg" |
| 17 | +S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070.gpkg" |
| 18 | + |
| 19 | +# Grid size for vector polygons |
| 20 | +GRID_SIZE = 800 |
| 21 | +# Projection for final layers |
| 22 | +CRS_PROJECTION = 5070 |
| 23 | + |
| 24 | +# Name of column containing s2 tile ids |
| 25 | +S2_FEATURE_NAME_COLUMN = "name" |
| 26 | +# Name of column that will contain S2 tile ids that contain each vector polygon |
| 27 | +VECTOR_COLUMN_NAME = "s2_tiles" |
| 28 | + |
| 29 | +# Date range arguments. This will look for the months of June and July, from 2020 to 2024 |
| 30 | +START_YEAR = 2020 |
| 31 | +END_YEAR = 2024 |
| 32 | +START_MONTH = 6 |
| 33 | +END_MONTH = 7 |
| 34 | + |
| 35 | +# Max cloud cover to use as search parameter |
| 36 | +MAX_CLOUD_COVER = 15 |
| 37 | + |
| 38 | +### |
| 39 | +# Base data |
| 40 | +### |
| 41 | + |
| 42 | +# The USA polygon is base off 2018's `cb_2018_us_nation_5m` shapefile, taken from here: |
| 43 | +# https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html |
| 44 | +# |
| 45 | +# It was then processed using QGIS to keep only the contiguous states, without any islands. |
| 46 | +# |
| 47 | +# The Sentinel 2 grid was taken from the kml file found here: |
| 48 | +# https://sentiwiki.copernicus.eu/web/s2-products |
| 49 | + |
| 50 | +### |
| 51 | +# Initial pre-processing |
| 52 | +### |
| 53 | + |
| 54 | +# The layers below were processed using QGIS. |
| 55 | +# |
| 56 | +# For the purpose of this analysis, only the contiguous lower 48 states have been |
| 57 | +# conserved; smaller islands/land masses have also been striped. |
| 58 | +# |
| 59 | +# The S2 tiling grid has been trimmed to keep only the grid cells that overlap with the |
| 60 | +# contiguous states. |
| 61 | +# |
| 62 | +# Since our area of study is quite large, the `EPSG:5070` projection was chosen, as it |
| 63 | +# covers the whole area, introduces minimal distortion while preserving area. |
| 64 | +# |
| 65 | + |
| 66 | +usa_polygon = gpd.read_file(USA_POLYGON_FILE) |
| 67 | +s2_grid = gpd.read_file(S2_USA_GRID_FILE) |
| 68 | + |
| 69 | +# Creating our grid |
| 70 | +# |
| 71 | +# From this, we want to create a grid of square polygons with which we will later on |
| 72 | +# query the [Planetary Computer](https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a) |
| 73 | +# Sentinel 2 dataset and clip the selected Sentinel 2 images. |
| 74 | + |
| 75 | + |
| 76 | +bbox = usa_polygon.total_bounds |
| 77 | +grid_800m_filename = DATA_DIR / "polygon_grid_800m.gpkg" |
| 78 | +print("Starting processing for [create_vector_grid_parallel]") |
| 79 | +grid_800m = create_vector_grid_parallel(bounding_box=bbox, grid_size=GRID_SIZE, crs="EPSG:5070") |
| 80 | +print(f"Printing len(grid_parallel) to check if grid contains same amount of polygons : {len(grid_800m)}") |
| 81 | + |
| 82 | +# Selecting the useful polygons |
| 83 | +# |
| 84 | +# Now, since our grid was created using the extent of our input polygon (continental USA), |
| 85 | +# we need to filter out the polygons that do not intersect with it. |
| 86 | + |
| 87 | +# Doing this in Python is not the most efficient way to do things, but since it's a |
| 88 | +# step that shouldn't be done over and over, it's not that critical. |
| 89 | + |
| 90 | +# If ever you need to do this step in an efficient way because the data is just too |
| 91 | +# big or too complex, it would be better off going through QGIS, PyGQIS, GDAL or |
| 92 | +# some other more efficient way to do this operation. |
| 93 | + |
| 94 | +usa_polygon_grid_800m_filename = DATA_DIR / "usa_polygon_grid_800m.gpkg" |
| 95 | +print("Starting intersect selection") |
| 96 | +usa_polygon_grid_800m = select_polygons_by_location(grid_800m, usa_polygon) |
| 97 | +to_geopackage(usa_polygon_grid_800m, usa_polygon_grid_800m_filename) |
| 98 | + |
| 99 | +# ## Data processing pipeline prototype |
| 100 | +# ### Finding the best image for each S2 tiling grid |
| 101 | + |
| 102 | +# This is the full list of S2 grids |
| 103 | +s2_tile_grid_list = s2_grid["name"].to_list() |
| 104 | + |
| 105 | +### |
| 106 | +# Finding the best products for our subset use case |
| 107 | +### |
| 108 | + |
| 109 | +# `s2_feature_name_columns` is the name of the column in `s2_grid` where the id of |
| 110 | +# the different tiles is found. |
| 111 | +# |
| 112 | +# `vector_column_name` is the name of the column in which the best results will be stored |
| 113 | + |
| 114 | +# Initiating our client |
| 115 | +best_products_client = BestProductsForFeatures( |
| 116 | + sentinel2_tiling_grid=s2_grid, |
| 117 | + sentinel2_tiling_grid_column=S2_FEATURE_NAME_COLUMN, |
| 118 | + vector_features=usa_polygon_grid_800m_filename, |
| 119 | + vector_features_column=VECTOR_COLUMN_NAME, |
| 120 | + max_cloud_cover=MAX_CLOUD_COVER, |
| 121 | +) |
| 122 | + |
| 123 | +# Executing the search |
| 124 | +# |
| 125 | +# This search look only for complete products, meaning products with less than |
| 126 | +# 5 percent of nodata. |
| 127 | + |
| 128 | +best_products_client.create_date_ranges(START_YEAR, END_YEAR, START_MONTH, END_MONTH) |
| 129 | +products = best_products_client.find_best_complete_products() |
| 130 | + |
| 131 | +# Selecting the best products for each vector tile |
| 132 | +# This step is necessary as some of our vector polygons can be withing multiple S2 tiles. |
| 133 | +# The best available S2 tile is therefore selected for each vector polygon. |
| 134 | + |
| 135 | +best_results_path = DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg" |
| 136 | +best_results = best_products_client.select_best_products_per_feature() |
| 137 | +to_geopackage(best_results, DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg") |
| 138 | + |
| 139 | +# Writing the results to file |
| 140 | +best_products_client.to_file() |
0 commit comments