Skip to content

Commit 91bb3c7

Browse files
authored
Merge pull request #28 from FrontierDevelopmentLab/bugfix/boundless-window
Remove download for jp2. Now using same function for geotiff and jp2 with rio. Remove gdal
2 parents 87f82aa + a37b285 commit 91bb3c7

File tree

2 files changed

+3
-112
lines changed

2 files changed

+3
-112
lines changed

providers/gcp/main.py

-42
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import base64
22
import datetime
3-
import io
43
import json
54
import logging
65
import os
@@ -13,7 +12,6 @@
1312
import pystac
1413
from flask import Flask
1514
from flask import request
16-
from google.cloud import storage
1715
from loguru import logger
1816
from satextractor.extractor import task_mosaic_patches
1917
from satextractor.models import BAND_INFO
@@ -35,45 +33,6 @@
3533
app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
3634

3735

38-
def get_bucket_name(url: str) -> str:
39-
"""Get the bucket for an url like:
40-
'gs://gcp-public-data-sentinel-2/
41-
Args:
42-
url (str): The gs url
43-
Returns:
44-
str: the bucket name
45-
"""
46-
47-
return url.split("/")[2]
48-
49-
50-
def get_blob_name(url: str) -> str:
51-
"""Get the blob for an url like:
52-
'gs://gcp-public-data-sentinel-2/tiles/17/Q/QV/S2B_MSIL1C.jp2'
53-
Args:
54-
url (str): The gs url
55-
Returns:
56-
str: the blob name
57-
"""
58-
return "/".join(url.split("/")[3:])
59-
60-
61-
def download_blob(url: str) -> io.BytesIO:
62-
"""Download a blob as bytes
63-
Args:
64-
url (str): the url to download
65-
Returns:
66-
io.BytesIO: the content as bytes
67-
"""
68-
storage_client = storage.Client()
69-
bucket_name = get_bucket_name(url)
70-
source_blob_name = get_blob_name(url)
71-
bucket = storage_client.bucket(bucket_name)
72-
blob = bucket.blob(source_blob_name)
73-
f = io.BytesIO(blob.download_as_bytes())
74-
return f
75-
76-
7736
def format_stacktrace():
7837
parts = ["Traceback (most recent call last):\n"]
7938
parts.extend(traceback.format_stack(limit=25)[:-2])
@@ -155,7 +114,6 @@ def extract_patches():
155114

156115
patches = task_mosaic_patches(
157116
cloud_fs=fs,
158-
download_f=download_blob,
159117
task=task,
160118
method="max",
161119
resolution=archive_resolution,

src/satextractor/extractor/extractor.py

+3-70
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import os
22
from typing import Any
3-
from typing import Callable
43
from typing import List
54
from typing import Tuple
65

76
import numpy as np
87
import rasterio
98
from affine import Affine
109
from loguru import logger
11-
from osgeo import gdal
12-
from osgeo import osr
1310
from rasterio import warp
1411
from rasterio.crs import CRS
1512
from rasterio.enums import Resampling
@@ -85,7 +82,7 @@ def get_tile_pixel_coords(tiles: List[Tile], raster_file: str) -> List[Tuple[int
8582
return list(zip(rows, cols))
8683

8784

88-
def download_and_extract_tiles_window_COG(
85+
def download_and_extract_tiles_window(
8986
fs: Any,
9087
task: ExtractionTask,
9188
resolution: int,
@@ -148,69 +145,8 @@ def download_and_extract_tiles_window_COG(
148145
return outfiles
149146

150147

151-
def download_and_extract_tiles_window(
152-
download_f: Callable,
153-
task: ExtractionTask,
154-
resolution: int,
155-
) -> List[str]:
156-
"""Download and extract from the task assets the window bounding the tiles.
157-
i.e a crop of the original assets will
158-
159-
Args:
160-
download_f (Callable): The download function to use. It should return a BytesIO
161-
to read the content.
162-
task (ExtractionTask): The extraction task
163-
resolution (int): The target resolution
164-
165-
Returns:
166-
List[str]: A list of files that store the crops of the original assets
167-
"""
168-
band = task.band
169-
urls = [item.assets[band].href for item in task.item_collection.items]
170-
171-
epsg = task.tiles[0].epsg
172-
out_files = []
173-
for i, url in enumerate(urls):
174-
content = download_f(url)
175-
176-
gdal.FileFromMemBuffer(f"/vsimem/{task.task_id}_content", content.read())
177-
d = gdal.Open(f"/vsimem/{task.task_id}_content", gdal.GA_Update)
178-
179-
proj = osr.SpatialReference(wkt=d.GetProjection())
180-
proj = proj.GetAttrValue("AUTHORITY", 1)
181-
d = None
182-
183-
proj_win = get_proj_win(task.tiles)
184-
185-
if int(proj) != epsg:
186-
file = gdal.Warp(
187-
f"{task.task_id}_warp.vrt",
188-
f"/vsimem/{task.task_id}_content",
189-
dstSRS=f"EPSG:{epsg}",
190-
creationOptions=["QUALITY=100", "REVERSIBLE=YES"],
191-
)
192-
else:
193-
file = f"/vsimem/{task.task_id}_content"
194-
195-
out_f = f"{task.task_id}_{i}.jp2"
196-
gdal.Translate(
197-
out_f,
198-
file,
199-
projWin=proj_win,
200-
projWinSRS=f"EPSG:{epsg}",
201-
xRes=resolution,
202-
yRes=-resolution,
203-
resampleAlg="bilinear",
204-
creationOptions=["QUALITY=100", "REVERSIBLE=YES"],
205-
)
206-
file = None
207-
out_files.append(out_f)
208-
return out_files
209-
210-
211148
def task_mosaic_patches(
212149
cloud_fs: Any,
213-
download_f: Callable,
214150
task: ExtractionTask,
215151
method: str = "max",
216152
resolution: int = 10,
@@ -219,7 +155,7 @@ def task_mosaic_patches(
219155
"""Get tile patches from the mosaic of a given task
220156
221157
Args:
222-
download_f (Callable): The function to download the task assets
158+
cloud_fs (Any): the cloud_fs to access the files
223159
task (ExtractionTask): The task
224160
method (str, optional): The method to use while merging the assets. Defaults to "max".
225161
resolution (int, optional): The target resolution. Defaults to 10.
@@ -229,10 +165,7 @@ def task_mosaic_patches(
229165
List[np.ndarray]: The tile patches as numpy arrays
230166
"""
231167

232-
if task.constellation == "sentinel-2":
233-
out_files = download_and_extract_tiles_window(download_f, task, resolution)
234-
else:
235-
out_files = download_and_extract_tiles_window_COG(cloud_fs, task, resolution)
168+
out_files = download_and_extract_tiles_window(cloud_fs, task, resolution)
236169

237170
out_f = f"{task.task_id}_{dst_path}"
238171
datasets = [rasterio.open(f) for f in out_files]

0 commit comments

Comments
 (0)