Skip to content

Commit d306ae5

Browse files
panford-idZeLynxy
andauthored
Merge develop to main (#308)
* feat: add support dialog * feat: add support dialog module * fix: linting and formatting * Add: footer disclaimer text * Feat: add trademark disclaimer * Fix: linting and formatting * Add: disclaimer to task monitor * add: padding between task monitor pager and disclaimer window * add: padding between task monitor pager and disclaimer window * feat: enabled cloud masking in app component * fixed collate funcs test * defined prediction no data val as constant * Add: disclaimer to task monitor * add: padding between task monitor pager and disclaimer window * add: padding between task monitor pager and disclaimer window * feat: enabled cloud masking in app component * fixed collate funcs test * defined prediction no data val as constant --------- Co-authored-by: Iffanice Houndayi <iffanicehoundayi@gmail.com>
1 parent d605ee4 commit d306ae5

18 files changed

Lines changed: 174 additions & 46 deletions

File tree

instageo/data/raster_chip_creator.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,10 @@
3333
from instageo.data.flags import FLAGS # Import flags from central location
3434
from instageo.data.hls_utils import HLSRasterPipeline, add_hls_stac_items
3535
from instageo.data.s2_utils import S2RasterPipeline, add_s2_stac_items
36-
from instageo.data.settings import (
37-
HLSAPISettings,
38-
HLSBandsSettings,
39-
HLSBlockSizes,
40-
NoDataValues,
41-
S2APISettings,
42-
)
36+
from instageo.data.settings import HLSAPISettings, S2APISettings
4337
from instageo.data.stac_utils import create_records_with_items
4438

4539
# Create instances of the settings classes
46-
NO_DATA_VALUES = NoDataValues()
47-
HLS_BLOCKSIZE = HLSBlockSizes()
48-
HLS_BANDS = HLSBandsSettings()
4940
HLS_API = HLSAPISettings()
5041
S2_API = S2APISettings()
5142

instageo/data/settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,5 @@ class DataPipelineSettings(BaseSettings):
228228
BATCH_SIZE: int = 16 # Number of records to process at a time
229229
METADATA_SEARCH_RATELIMIT: int = 10 # Number of metadata searches per minute
230230
COG_DOWNLOAD_RATELIMIT: int = 30 # Number of COG downloads per minute
231+
HLS_SPATIAL_RESOLUTION: float = 0.0002694945852358564
232+
S2_SPATIAL_RESOLUTION: float = 8.983152841195215e-05

instageo/model/configs/config_dataclasses.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class ModelInfo(BaseModel):
2323
num_steps: int = 1
2424
temporal_step: int = 0
2525
model_description: str = ""
26+
no_data_value: int = 0
2627

2728

2829
class ModelEnum(str, Enum):

instageo/model/infer_utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def chip_inference(
8989

9090
with torch.no_grad():
9191
with ThreadPoolExecutor(max_workers=num_workers) as executor:
92-
for (data, _), file_names in tqdm(dataloader, desc="Running Inference"):
92+
for (data, _), file_names, nan_mask in tqdm(dataloader, desc="Running Inference"):
9393
data = data.to(device)
9494
prediction_batch = model(data)
9595

@@ -100,6 +100,10 @@ def chip_inference(
100100
torch.argmax(prediction_batch, dim=1).cpu().numpy().astype(np.int8)
101101
)
102102

103+
# Mask out the predictions where the chip had no_data_value
104+
nan_mask = np.all(nan_mask, axis=1).astype(int)
105+
prediction_batch = np.where(nan_mask == 1, -1, prediction_batch)
106+
103107
profiles = []
104108
for file_name in file_names:
105109
with rasterio.open(file_name) as src:
@@ -109,6 +113,7 @@ def chip_inference(
109113
dtype=rasterio.int8
110114
if prediction_batch.dtype == np.int8
111115
else rasterio.float32,
116+
nodata=-1,
112117
)
113118
profiles.append(profile)
114119

instageo/model/pipeline_utils.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,22 @@ def eval_collate_fn(batch: tuple[torch.Tensor]) -> tuple[torch.Tensor, torch.Ten
8989
return data, labels
9090

9191

92-
def infer_collate_fn(batch: tuple[torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
92+
def infer_collate_fn(
93+
batch: tuple[torch.Tensor],
94+
) -> tuple[tuple[torch.Tensor, torch.Tensor], List[str], np.ndarray]:
9395
"""Inference DataLoader Collate Function.
9496
9597
Args:
9698
batch (Tuple[Tensor]): A list of tuples containing features and labels.
9799
98100
Returns:
99-
Tuple of (x,y) concatenated into separate tensors
101+
Tuple of ((x,y), filepaths, nan_mask)
100102
"""
101103
data = torch.stack([a[0][0] for a in batch], 0)
102104
labels = [a[0][1] for a in batch]
103105
filepaths = [a[1] for a in batch]
104-
return (data, labels), filepaths
106+
nan_mask = np.stack([(a[2]) for a in batch], 0)
107+
return ((data, labels), filepaths, nan_mask)
105108

106109

107110
def create_dataloader(
@@ -282,7 +285,10 @@ def create_instageo_dataset(
282285
bands=cfg.dataloader.bands,
283286
replace_label=cfg.dataloader.replace_label,
284287
reduce_to_zero=cfg.dataloader.reduce_to_zero,
285-
chip_no_data_value=cfg.dataloader.no_data_value,
288+
chip_no_data_value=max(
289+
(cfg.dataloader.no_data_value or 0), 0
290+
), # backward compatibility with old models where no_data_value was set to -9999/None
291+
# no_data_value will be set to 0 for those models
286292
label_no_data_value=cfg.train.ignore_index,
287293
constant_multiplier=cfg.dataloader.constant_multiplier,
288294
include_filenames=include_filenames,

instageo/model/registry/model_registry.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def get_model_metadata_for_size(
6464
num_steps=model_config["dataloader"]["temporal_dim"],
6565
temporal_step=model_data.get("temporal_step", 0),
6666
model_description=model_data.get("model_description", "unknown"),
67+
no_data_value=max(
68+
(model_config["dataloader"].get("no_data_value", 0) or 0), 0
69+
), # backward compatibility with old models where no_data_value was set to -9999/None
70+
# no_data_value will be set to 0 for those models
6771
)
6872

6973
def get_model_config(

instageo/new_apps/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,10 @@ Before running the deployment script, you must configure Cloudflare Tunnels:
173173
Navigate at the root directory of the project and run
174174
```bash
175175
# Deploy with Cloudflare tunnel
176-
./scripts/deploy.sh
176+
./scripts/deploy.sh --cloudflare
177177

178178
# Deploy without Cloudflare tunnel
179-
./scripts/deploy.sh --skip-cloudflare
179+
./scripts/deploy.sh
180180
```
181181

182182
## Monitoring

instageo/new_apps/backend/app/cog_converter.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
logger = logging.getLogger(__name__)
1515

16+
PREDICTION_NO_DATA_VALUE = -1
17+
1618

1719
class COGConverter:
1820
"""Simple service for merging TIF chips into Cloud Optimized GeoTIFFs using GDAL."""
@@ -22,14 +24,19 @@ def __init__(self) -> None:
2224
pass
2325

2426
def merge_task_files_to_cog(
25-
self, data_path: str, chip_size: int = 256, compute_seg_stats: bool = False
27+
self,
28+
data_path: str,
29+
chip_size: int = 256,
30+
compute_seg_stats: bool = False,
31+
no_data_value: int = 0,
2632
) -> Dict[str, Any]:
2733
"""Merge TIF chips and prediction results from a directory into COGs using GDAL in parallel.
2834
2935
Args:
3036
data_path: Directory containing files to merge into COG.
3137
chip_size: Size of chips (used as block size).
3238
compute_seg_stats: Whether to compute segmentation stats.
39+
no_data_value: No-data value to set in COG.
3340
3441
Returns:
3542
Tuple of (merged_chips_cog_path, merged_predictions_cog_path)
@@ -62,13 +69,15 @@ def merge_task_files_to_cog(
6269
str(output_path / "chips_merged.tif"),
6370
chip_size,
6471
True, # Enable band selection for chips (keep only B, G, R)
72+
no_data_value,
6573
)
6674
predictions_future = executor.submit(
6775
self.merge_files_to_cog,
6876
prediction_files,
6977
str(output_path / "predictions_merged.tif"),
7078
chip_size,
7179
False, # No band selection for predictions
80+
PREDICTION_NO_DATA_VALUE,
7281
)
7382

7483
# Wait for both to complete and get results
@@ -101,6 +110,7 @@ def merge_files_to_cog(
101110
output_path: str,
102111
chip_size: int = 256,
103112
select_bands: bool = False,
113+
no_data_value: int = 0,
104114
) -> str:
105115
"""Merge TIF files from a list of files into a single COG using GDAL.
106116
@@ -109,6 +119,8 @@ def merge_files_to_cog(
109119
output_path: Path for output COG file.
110120
chip_size: Size of chips (used as block size).
111121
select_bands: If True, keep only first 3 bands (B, G, R) for RGB data.
122+
no_data_value: No-data value to set in the COG. If None, the default no-data
123+
value will be set.
112124
113125
Returns:
114126
Path to the created COG file.
@@ -126,6 +138,10 @@ def merge_files_to_cog(
126138
"gdal_merge.py",
127139
"-o",
128140
temp_merged,
141+
"-n",
142+
str(no_data_value),
143+
"-a_nodata",
144+
str(no_data_value),
129145
*[str(f) for f in tif_files],
130146
]
131147

@@ -152,6 +168,9 @@ def merge_files_to_cog(
152168
) # Keep only first 3 bands (B, G, R)
153169
logger.info("Selecting only first 3 bands (B, G, R) for RGB data")
154170

171+
cog_cmd.extend(["-a_nodata", str(no_data_value)])
172+
logger.info(f"Setting no-data value to {no_data_value} in COG")
173+
155174
cog_cmd.extend(
156175
[
157176
"-co",

instageo/new_apps/backend/app/data_processor.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
satellite data extraction tasks. It handles folder structure, parameter mapping,
55
and integration with the task system.
66
"""
7+
78
import json
89
import logging
910
from datetime import datetime
@@ -12,10 +13,12 @@
1213

1314
from absl import flags
1415

15-
from instageo.data.raster_chip_creator import main as bbox_chip_creator
16+
from instageo.data.settings import DataPipelineSettings
1617

1718
logger = logging.getLogger(__name__)
1819

20+
DATA_PIPELINE_SETTINGS = DataPipelineSettings()
21+
1922

2023
class DataProcessor:
2124
"""Proxy class for bounding boxes data pipeline integration."""
@@ -99,6 +102,12 @@ def _prepare_pipeline_params(
99102

100103
params = {
101104
"is_bbox_feature": True,
105+
"spatial_resolution": (
106+
DATA_PIPELINE_SETTINGS.HLS_SPATIAL_RESOLUTION
107+
if parameters["data_source"] == "HLS"
108+
else DATA_PIPELINE_SETTINGS.S2_SPATIAL_RESOLUTION
109+
),
110+
"chip_size": parameters["chip_size"],
102111
"bbox_feature_path": str(bbox_file),
103112
"output_directory": str(self.data_dir),
104113
"temporal_tolerance": parameters["temporal_tolerance"],
@@ -108,6 +117,8 @@ def _prepare_pipeline_params(
108117
"cloud_coverage": parameters["cloud_coverage"],
109118
"date": parameters["date"],
110119
}
120+
if parameters["mask_cloud"]:
121+
params["mask_types"] = "cloud"
111122
return params
112123

113124
def _run_pipeline(self, params: Dict[str, Any]) -> None:
@@ -116,6 +127,8 @@ def _run_pipeline(self, params: Dict[str, Any]) -> None:
116127
Args:
117128
params: Parameters for bounding boxes data pipeline.
118129
"""
130+
from instageo.data.raster_chip_creator import main as bbox_chip_creator
131+
119132
# Build command line arguments
120133
args = ["raster_chip_creator"]
121134
for key, value in params.items():

instageo/new_apps/backend/app/main.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ class TaskCreationRequest(BaseModel):
208208
model_size: str
209209
date: str
210210
cloud_coverage: int
211+
mask_cloud: bool
211212
temporal_tolerance: int
212213

213214

@@ -287,6 +288,7 @@ async def create_task(
287288
"model_size": model_size,
288289
"date": task_request.date,
289290
"cloud_coverage": task_request.cloud_coverage,
291+
"mask_cloud": task_request.mask_cloud,
290292
"temporal_tolerance": task_request.temporal_tolerance or model_info.temporal_step,
291293
# True model parameters from registry
292294
"chip_size": model_info.chip_size,
@@ -297,6 +299,7 @@ async def create_task(
297299
"model_short_name": model_info.model_short_name,
298300
"model_name": model_info.model_name,
299301
"classes_mapping": model_info.classes_mapping,
302+
"no_data_value": model_info.no_data_value,
300303
}
301304

302305
# Create task instance (this automatically starts data processing)

0 commit comments

Comments
 (0)