Skip to content

Commit 449866e

Browse files
committed
Fix pre-commit and pylint warnings
1 parent 257cf6e commit 449866e

File tree

2 files changed

+141
-3
lines changed

2 files changed

+141
-3
lines changed

geospatial_tools/stac.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -528,9 +528,7 @@ def download_sorted_by_cloud_cover_search_results(
528528
self.downloaded_cloud_cover_sorted_assets = downloaded_search_results
529529
return downloaded_search_results
530530

531-
def download_best_cloud_cover_result(
532-
self, bands: list, base_directory: Union[str, pathlib.Path], max_notada=None
533-
) -> Asset:
531+
def download_best_cloud_cover_result(self, bands: list, base_directory: Union[str, pathlib.Path]) -> Asset:
534532
"""
535533
536534
Parameters
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import geopandas as gpd
2+
3+
from geospatial_tools import DATA_DIR
4+
from geospatial_tools.planetary_computer.sentinel_2 import BestProductsForFeatures
5+
from geospatial_tools.vector import (
6+
create_vector_grid_parallel,
7+
select_polygons_by_location,
8+
to_geopackage,
9+
)
10+
11+
###
12+
# Base arguments
13+
###
14+
15+
# Base files
16+
USA_POLYGON_FILE = DATA_DIR / "usa_polygon_5070.gpkg"
17+
S2_USA_GRID_FILE = DATA_DIR / "s2_grid_usa_polygon_5070.gpkg"
18+
19+
# Grid size for vector polygons
20+
GRID_SIZE = 800
21+
# Projection for final layers
22+
CRS_PROJECTION = 5070
23+
24+
# Name of column containing s2 tile ids
25+
S2_FEATURE_NAME_COLUMN = "name"
26+
# Name of column that will contain S2 tile ids that contain each vector polygon
27+
VECTOR_COLUMN_NAME = "s2_tiles"
28+
29+
# Date range arguments. This will look for the months of June and July, from 2020 to 2024
30+
START_YEAR = 2020
31+
END_YEAR = 2024
32+
START_MONTH = 6
33+
END_MONTH = 7
34+
35+
# Max cloud cover to use as search parameter
36+
MAX_CLOUD_COVER = 15
37+
38+
###
39+
# Base data
40+
###
41+
42+
# The USA polygon is base off 2018's `cb_2018_us_nation_5m` shapefile, taken from here:
43+
# https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
44+
#
45+
# It was then processed using QGIS to keep only the contiguous states, without any islands.
46+
#
47+
# The Sentinel 2 grid was taken from the kml file found here:
48+
# https://sentiwiki.copernicus.eu/web/s2-products
49+
50+
###
51+
# Initial pre-processing
52+
###
53+
54+
# The layers below were processed using QGIS.
55+
#
56+
# For the purpose of this analysis, only the contiguous lower 48 states have been
57+
# conserved; smaller islands/land masses have also been striped.
58+
#
59+
# The S2 tiling grid has been trimmed to keep only the grid cells that overlap with the
60+
# contiguous states.
61+
#
62+
# Since our area of study is quite large, the `EPSG:5070` projection was chosen, as it
63+
# covers the whole area, introduces minimal distortion while preserving area.
64+
#
65+
66+
usa_polygon = gpd.read_file(USA_POLYGON_FILE)
67+
s2_grid = gpd.read_file(S2_USA_GRID_FILE)
68+
69+
# Creating our grid
70+
#
71+
# From this, we want to create a grid of square polygons with which we will later on
72+
# query the [Planetary Computer](https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a)
73+
# Sentinel 2 dataset and clip the selected Sentinel 2 images.
74+
75+
76+
bbox = usa_polygon.total_bounds
77+
grid_800m_filename = DATA_DIR / "polygon_grid_800m.gpkg"
78+
print("Starting processing for [create_vector_grid_parallel]")
79+
grid_800m = create_vector_grid_parallel(bounding_box=bbox, grid_size=GRID_SIZE, crs="EPSG:5070")
80+
print(f"Printing len(grid_parallel) to check if grid contains same amount of polygons : {len(grid_800m)}")
81+
82+
# Selecting the useful polygons
83+
#
84+
# Now, since our grid was created using the extent of our input polygon (continental USA),
85+
# we need to filter out the polygons that do not intersect with it.
86+
87+
# Doing this in Python is not the most efficient way to do things, but since it's a
88+
# step that shouldn't be done over and over, it's not that critical.
89+
90+
# If ever you need to do this step in an efficient way because the data is just too
91+
# big or too complex, it would be better off going through QGIS, PyGQIS, GDAL or
92+
# some other more efficient way to do this operation.
93+
94+
usa_polygon_grid_800m_filename = DATA_DIR / "usa_polygon_grid_800m.gpkg"
95+
print("Starting intersect selection")
96+
usa_polygon_grid_800m = select_polygons_by_location(grid_800m, usa_polygon)
97+
to_geopackage(usa_polygon_grid_800m, usa_polygon_grid_800m_filename)
98+
99+
# ## Data processing pipeline prototype
100+
# ### Finding the best image for each S2 tiling grid
101+
102+
# This is the full list of S2 grids
103+
s2_tile_grid_list = s2_grid["name"].to_list()
104+
105+
###
106+
# Finding the best products for our subset use case
107+
###
108+
109+
# `s2_feature_name_columns` is the name of the column in `s2_grid` where the id of
110+
# the different tiles is found.
111+
#
112+
# `vector_column_name` is the name of the column in which the best results will be stored
113+
114+
# Initiating our client
115+
best_products_client = BestProductsForFeatures(
116+
sentinel2_tiling_grid=s2_grid,
117+
sentinel2_tiling_grid_column=S2_FEATURE_NAME_COLUMN,
118+
vector_features=usa_polygon_grid_800m_filename,
119+
vector_features_column=VECTOR_COLUMN_NAME,
120+
max_cloud_cover=MAX_CLOUD_COVER,
121+
)
122+
123+
# Executing the search
124+
#
125+
# This search look only for complete products, meaning products with less than
126+
# 5 percent of nodata.
127+
128+
best_products_client.create_date_ranges(START_YEAR, END_YEAR, START_MONTH, END_MONTH)
129+
products = best_products_client.find_best_complete_products()
130+
131+
# Selecting the best products for each vector tile
132+
# This step is necessary as some of our vector polygons can be withing multiple S2 tiles.
133+
# The best available S2 tile is therefore selected for each vector polygon.
134+
135+
best_results_path = DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg"
136+
best_results = best_products_client.select_best_products_per_feature()
137+
to_geopackage(best_results, DATA_DIR / "vector_tiles_with_s2tiles_subset.gpkg")
138+
139+
# Writing the results to file
140+
best_products_client.to_file()

0 commit comments

Comments
 (0)