From e4e4d077d729adaa0192dcff65c6bd2d7688123f Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Tue, 10 Dec 2024 13:07:41 +0100 Subject: [PATCH 01/11] Include Files API v2 and refactor to use mixins, add logging --- .gitignore | 3 + isimip_client/client.py | 312 ++++++++++++++++++++++++++++++++-------- 2 files changed, 252 insertions(+), 63 deletions(-) diff --git a/.gitignore b/.gitignore index 0351dbd..a38620b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,10 @@ __pycache__/ /htmlcov /env +/test.py +/downloads /notebooks/env /notebooks/downloads .ipynb_checkpoints + diff --git a/isimip_client/client.py b/isimip_client/client.py index 4863bba..0ccde4c 100644 --- a/isimip_client/client.py +++ b/isimip_client/client.py @@ -1,4 +1,6 @@ import hashlib +import json +import logging import time import zipfile from pathlib import Path @@ -6,6 +8,8 @@ import requests +logger = logging.getLogger(__name__) + class HTTPClient: @@ -17,7 +21,7 @@ def parse_response(self, response): response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: - print(response.content) + logger.error(response.content) raise e def get(self, url, params={}): @@ -49,11 +53,11 @@ def _build_url(self, resource_url, kwargs, pk=None): if 'list_route' in kwargs: url += kwargs.pop('list_route').rstrip('/') + '/' elif 'nested_route' in kwargs: - url += '%s/' % kwargs.pop('parent_pk') + url += '{}/'.format(kwargs.pop('parent_pk')) url += kwargs.pop('nested_route').rstrip('/') + '/' if pk: - url += '%s/' % pk + url += f'{pk}/' if 'detail_route' in kwargs: url += kwargs.pop('detail_route').rstrip('/') + '/' @@ -81,16 +85,7 @@ def destroy(self, resource_url, pk, **kwargs): return self.delete(url, pk) -class ISIMIPClient(RESTClient): - - def __init__(self, data_url='https://data.isimip.org/api/v1', files_api_url='https://files.isimip.org/api/v1', - auth=None, headers={}): - self.data_url = data_url - self.files_api_url = files_api_url - - self.base_url = data_url - self.auth = auth - self.headers = {} +class DataApiMixin: def datasets(self, **kwargs): return self.list('/datasets', **kwargs) @@ -104,50 +99,59 @@ def files(self, **kwargs): def file(self, pk, **kwargs): return self.retrieve('/files', pk, **kwargs) - def download(self, url, path=None, validate=True, extract=True): - headers = self.headers.copy() - file_name = Path(urlparse(url).path.split('/')[-1]) - file_path = (Path(path) if path else Path.cwd()) / file_name - file_path.parent.mkdir(exist_ok=True, parents=True) - if file_path.exists(): - # resume download - headers.update({'Range': f'bytes={file_path.stat().st_size}-'}) +class FilesApiMixin: - response = requests.get(url, stream=True, headers=headers) - if response.status_code == 416: - # download is complete - pass + def check(self, version): + if self.files_api_version != version: + raise RuntimeError(f'This method is only available in {version} of the Files API. ' + f'Please set "files_api_version=\'{version}\'".') + + def post_job(self, data, uploads=None, poll=None): + logger.info(f'job submitted data={data} uploads={uploads}') + + if uploads is None: + response = requests.post(self.files_api_url, json=data, auth=self.auth, headers=self.headers) else: - response.raise_for_status() + files = {'data': json.dumps(data)} + for upload in uploads: + upload_path = Path(upload).expanduser() + files[upload_path.name] = upload_path.read_bytes() + response = requests.post(self.files_api_url, files=files, auth=self.auth, headers=self.headers) - with open(file_path, 'ab') as fd: - for chunk in response.iter_content(chunk_size=65*1024): - fd.write(chunk) - if validate: - json_url = url.rsplit('/', 1)[0] + '/' + file_name.with_suffix('.json').as_posix() - response = requests.get(json_url, headers=self.headers) - response.raise_for_status() - json_data = response.json() - remote_checksum, remote_path = json_data['checksum'], json_data['path'] + job = self.parse_response(response) + self.log_job(job) - # compute file checksum - m = hashlib.sha512() - with open(file_path, 'rb') as fp: - # read and update in blocks of 64K - for block in iter(lambda: fp.read(65536), b''): - m.update(block) - checksum = m.hexdigest() + if poll and job['status'] in ['queued', 'started']: + time.sleep(poll) + return self.get_job(job['job_url'], poll=poll) + else: + return job - assert remote_path.endswith(file_name.as_posix()) - assert remote_checksum == checksum, f'Checksum {checksum} != {remote_checksum}' + def get_job(self, job_url, poll=None): + response = requests.get(job_url, auth=self.auth, headers=self.headers) - if file_path.suffix == '.zip' and extract: - with zipfile.ZipFile(file_path, 'r') as zip_ref: - zip_ref.extractall(path) + job = self.parse_response(response) + self.log_job(job) + + if poll and job['status'] in ['queued', 'started']: + time.sleep(poll) + return self.get_job(job['job_url'], poll=poll) + else: + return job + + def log_job(self, job): + if job['status'] == 'finished': + logger.info('job {id} {status} meta={meta} file_url={file_url}'.format(**job)) + else: + logger.info('job {id} {status} meta={meta}'.format(**job)) + +class FilesApiV1Mixin: def mask(self, paths, country=None, bbox=None, landonly=None, poll=None): + self.check('v1') + payload = {} if isinstance(paths, list): @@ -164,9 +168,11 @@ def mask(self, paths, country=None, bbox=None, landonly=None, poll=None): elif landonly is not None: payload['task'] = 'mask_landonly' - return self.post_job(payload, poll) + return self.post_job(payload, poll=poll) def cutout(self, paths, bbox, poll=None): + self.check('v1') + payload = { 'task': 'cutout_bbox', 'bbox': bbox @@ -177,9 +183,11 @@ def cutout(self, paths, bbox, poll=None): else: payload['paths'] = [paths] - return self.post_job(payload, poll) + return self.post_job(payload, poll=poll) def select(self, paths, country=None, bbox=None, point=None, poll=None): + self.check('v1') + payload = {} if isinstance(paths, list): @@ -197,17 +205,195 @@ def select(self, paths, country=None, bbox=None, point=None, poll=None): payload['task'] = 'select_point' payload['point'] = point - return self.post_job(payload, poll) - - def post_job(self, payload, poll=None): - while True: - response = requests.post(self.files_api_url, json=payload, auth=self.auth, headers=self.headers) - job = self.parse_response(response) - if poll: - print('job', job['id'], job['status'], job['meta'] if job['meta'] else '') - if job['status'] in ['queued', 'started']: - time.sleep(poll) - else: - return job - else: - return job + return self.post_job(payload, poll=poll) + + +class FilesApiV2Mixin: + + def submit_job(self, paths, operations, uploads, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': operations + }, uploads=uploads, poll=poll) + + def select_bbox(self, paths, bbox, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'select_bbox', + 'bbox': bbox + } + ] + }, poll=poll) + + def select_point(self, paths, point, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'select_point', + 'point': point + } + ] + }, poll=poll) + + def mask_bbox(self, paths, bbox, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'mask_bbox', + 'bbox': bbox + } + ] + }, poll=poll) + + def mask_country(self, paths, country, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'mask_country', + 'country': country + } + ] + }, poll=poll) + + def mask_landonly(self, paths, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'mask_landonly' + } + ] + }, poll=poll) + + def mask_mask(self, paths, mask, var, compute_mean=False, output_csv=False, poll=None): + self.check('v2') + mask = Path(mask) + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'mask_mask', + 'mask': mask.name, + 'compute_mean': compute_mean, + 'output_csv': output_csv, + 'var': var + } + ] + }, uploads=[mask], poll=poll) + + def mask_shape(self, paths, shape, layer, compute_mean=False, output_csv=False, poll=None): + self.check('v2') + shape = Path(shape) + mask = shape.with_suffix('.nc') + var = f'm_{layer}' + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'create_mask', + 'shape': shape.name, + 'mask': mask.name, + }, + { + 'operation': 'mask_mask', + 'mask': mask.name, + 'compute_mean': compute_mean, + 'output_csv': output_csv, + 'var': var + } + ] + }, uploads=[shape], poll=poll) + + def cutout_bbox(self, paths, bbox, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'cutout_bbox', + 'bbox': bbox + } + ] + }, poll=poll) + + def cutout_point(self, paths, point, poll=None): + self.check('v2') + return self.post_job({ + 'paths': paths, + 'operations': [ + { + 'operation': 'cutout_point', + 'point': point + } + ] + }, poll=poll) + + +class DownloadMixin: + + def download(self, url, path=None, validate=False, extract=True): + headers = self.headers.copy() + + file_name = Path(urlparse(url).path.split('/')[-1]) + file_path = (Path(path) if path else Path.cwd()) / file_name + file_path.parent.mkdir(exist_ok=True, parents=True) + if file_path.exists(): + # resume download + headers.update({'Range': f'bytes={file_path.stat().st_size}-'}) + + response = requests.get(url, stream=True, headers=headers) + if response.status_code == 416: + # download is complete + pass + else: + response.raise_for_status() + + with open(file_path, 'ab') as fd: + for chunk in response.iter_content(chunk_size=65*1024): + fd.write(chunk) + + if validate: + json_url = url.rsplit('/', 1)[0] + '/' + file_name.with_suffix('.json').as_posix() + response = requests.get(json_url, headers=self.headers) + response.raise_for_status() + json_data = response.json() + remote_checksum, remote_path = json_data['checksum'], json_data['path'] + + # compute file checksum + m = hashlib.sha512() + with open(file_path, 'rb') as fp: + # read and update in blocks of 64K + for block in iter(lambda: fp.read(65536), b''): + m.update(block) + checksum = m.hexdigest() + + assert remote_path.endswith(file_name.as_posix()) + assert remote_checksum == checksum, f'Checksum {checksum} != {remote_checksum}' + + if file_path.suffix == '.zip' and extract: + with zipfile.ZipFile(file_path, 'r') as zip_ref: + zip_ref.extractall(path) + + +class ISIMIPClient(DataApiMixin, FilesApiMixin, FilesApiV1Mixin, FilesApiV2Mixin, DownloadMixin, RESTClient): + + def __init__(self, data_url='https://data.isimip.org/api/v1', files_api_url='https://files.isimip.org/api/v1', + files_api_version='v1', auth=None, headers={}): + self.data_url = data_url + self.files_api_url = files_api_url + self.files_api_version = files_api_version + + self.base_url = data_url + self.auth = auth + self.headers = {} From a05431d8f51519b2a9206f973bc90e7318887552 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Tue, 10 Dec 2024 18:40:54 +0100 Subject: [PATCH 02/11] Add cli client and refactor client --- .gitignore | 2 +- isimip_client/cli.py | 179 ++++++++++++++++++++++++++++++++++++++++ isimip_client/client.py | 86 +++++++++++-------- pyproject.toml | 7 +- 4 files changed, 237 insertions(+), 37 deletions(-) create mode 100644 isimip_client/cli.py diff --git a/.gitignore b/.gitignore index a38620b..5ef84a1 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ __pycache__/ /htmlcov /env -/test.py +/test* /downloads /notebooks/env /notebooks/downloads diff --git a/isimip_client/cli.py b/isimip_client/cli.py new file mode 100644 index 0000000..ce5a683 --- /dev/null +++ b/isimip_client/cli.py @@ -0,0 +1,179 @@ +import logging + +import click +from rich import print_json +from rich.logging import RichHandler + +from .client import ISIMIPClient + +logging.basicConfig(level='INFO', format='%(message)s', handlers=[RichHandler()]) + + +class SearchArgumentType(click.ParamType): + name = "search" + + def convert(self, value, param, ctx): + try: + search_key, search_value = value.split('=') + return (search_key, search_value) + except ValueError: + self.fail(f'{param} needs to be of the form key=value') + + +@click.group() +@click.pass_context +def main(ctx): + ctx.ensure_object(dict) + ctx.obj['client'] = ISIMIPClient( + data_url='https://data.isimip.org/api/v1', + files_api_url='https://files.isimip.org/api/v2', + files_api_version='v2' + ) + + +@main.result_callback() +@click.pass_context +def print_response(ctx, response, **kwargs): + if response: + if 'file_url' in response: + ctx.obj['client'].download(response['file_url'], validate=False, extract=False) + else: + print_json(data=response) + + +@main.command() +@click.pass_context +@click.argument('search', nargs=-1, type=SearchArgumentType()) +@click.option('--page', default=1) +@click.option('--page-size', default=10) +def datasets(ctx, search, **kwargs): + return ctx.obj['client'].datasets(**dict(search, **kwargs)) + + +@main.command() +@click.pass_context +@click.argument('id') +def dataset(ctx, **kwargs): + return ctx.obj['client'].dataset(**kwargs) + + +@main.command() +@click.pass_context +@click.argument('search', nargs=-1, type=SearchArgumentType()) +@click.option('--page', default=1) +@click.option('--page-size', default=10) +def files(ctx, search, **kwargs): + return ctx.obj['client'].files(**dict(search, **kwargs)) + + +@main.command() +@click.pass_context +@click.argument('id') +def file(ctx, **kwargs): + return ctx.obj['client'].file(**kwargs) + + +@main.command(name='select_bbox') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--west', type=click.FLOAT, required=True) +@click.option('--east', type=click.FLOAT, required=True) +@click.option('--south', type=click.FLOAT, required=True) +@click.option('--north', type=click.FLOAT, required=True) +@click.option('--mean', type=click.BOOL, default=False) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def select_bbox(ctx, **kwargs): + return ctx.obj['client'].select_bbox(**kwargs) + + +@main.command(name='select_point') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--lat', type=click.FLOAT, required=True) +@click.option('--lon', type=click.FLOAT, required=True) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def select_point(ctx, **kwargs): + return ctx.obj['client'].select_point(**kwargs) + + +@main.command(name='mask_bbox') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--west', type=click.FLOAT, required=True) +@click.option('--east', type=click.FLOAT, required=True) +@click.option('--south', type=click.FLOAT, required=True) +@click.option('--north', type=click.FLOAT, required=True) +@click.option('--mean', type=click.BOOL, default=False) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def mask_bbox(ctx, **kwargs): + return ctx.obj['client'].mask_bbox(**kwargs) + + +@main.command(name='mask_country') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--country', type=click.STRING, required=True) +@click.option('--mean',type=click.BOOL, default=False) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def mask_country(ctx, **kwargs): + return ctx.obj['client'].mask_country(**kwargs) + + +@main.command(name='mask_landonly') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +def mask_landonly(ctx, **kwargs): + return ctx.obj['client'].mask_landonly(**kwargs) + + +@main.command(name='mask_mask') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--mask', type=click.Path(), required=True) +@click.option('--var', type=click.STRING, required=True) +@click.option('--mean',type=click.BOOL, default=False) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def mask_mask(ctx, **kwargs): + return ctx.obj['client'].mask_mask(**kwargs) + + +@main.command(name='mask_shape') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--shape', type=click.Path(), required=True) +@click.option('--layer', type=click.INT, required=True) +@click.option('--mean',type=click.BOOL, default=False) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def mask_shape(ctx, **kwargs): + return ctx.obj['client'].mask_shape(**kwargs) + + +@main.command(name='cutout_bbox') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--west', type=click.FLOAT, required=True) +@click.option('--east', type=click.FLOAT, required=True) +@click.option('--south', type=click.FLOAT, required=True) +@click.option('--north', type=click.FLOAT, required=True) +@click.option('--mean',type=click.BOOL, default=False) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def cutout_bbox(ctx, **kwargs): + return ctx.obj['client'].cutout_bbox(**kwargs) + + +@main.command(name='cutout_point') +@click.pass_context +@click.argument('paths', nargs=-1, type=click.STRING) +@click.option('--lat', type=click.FLOAT, required=True) +@click.option('--lon', type=click.FLOAT, required=True) +@click.option('--csv', type=click.BOOL, default=False) +@click.option('--poll', type=click.INT, default=4) +def cutout_point(ctx, **kwargs): + return ctx.obj['client'].cutout_point(**kwargs) diff --git a/isimip_client/client.py b/isimip_client/client.py index 0ccde4c..3a52c2c 100644 --- a/isimip_client/client.py +++ b/isimip_client/client.py @@ -21,8 +21,8 @@ def parse_response(self, response): response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: - logger.error(response.content) - raise e + logger.error(f'{e} response={response.json()}') + return None def get(self, url, params={}): response = requests.get(self.base_url + url, params=params, auth=self.auth, headers=self.headers) @@ -116,30 +116,36 @@ def post_job(self, data, uploads=None, poll=None): files = {'data': json.dumps(data)} for upload in uploads: upload_path = Path(upload).expanduser() - files[upload_path.name] = upload_path.read_bytes() - response = requests.post(self.files_api_url, files=files, auth=self.auth, headers=self.headers) + try: + files[upload_path.name] = upload_path.read_bytes() + except FileNotFoundError as e: + logger.error(e) + return None + response = requests.post(self.files_api_url, files=files, auth=self.auth, headers=self.headers) job = self.parse_response(response) - self.log_job(job) + if job: + self.log_job(job) - if poll and job['status'] in ['queued', 'started']: - time.sleep(poll) - return self.get_job(job['job_url'], poll=poll) - else: - return job + if poll and job['status'] in ['queued', 'started']: + time.sleep(poll) + return self.get_job(job['job_url'], poll=poll) + else: + return job def get_job(self, job_url, poll=None): response = requests.get(job_url, auth=self.auth, headers=self.headers) job = self.parse_response(response) - self.log_job(job) + if job: + self.log_job(job) - if poll and job['status'] in ['queued', 'started']: - time.sleep(poll) - return self.get_job(job['job_url'], poll=poll) - else: - return job + if poll and job['status'] in ['queued', 'started']: + time.sleep(poll) + return self.get_job(job['job_url'], poll=poll) + else: + return job def log_job(self, job): if job['status'] == 'finished': @@ -217,50 +223,57 @@ def submit_job(self, paths, operations, uploads, poll=None): 'operations': operations }, uploads=uploads, poll=poll) - def select_bbox(self, paths, bbox, poll=None): + def select_bbox(self, paths, west, east, south, north, mean=False, csv=False, poll=None): self.check('v2') return self.post_job({ 'paths': paths, 'operations': [ { 'operation': 'select_bbox', - 'bbox': bbox + 'bbox': [west, east, south, north], + 'compute_mean': mean, + 'output_csv': csv } ] }, poll=poll) - def select_point(self, paths, point, poll=None): + def select_point(self, paths, lat, lon, csv=False, poll=None): self.check('v2') return self.post_job({ 'paths': paths, 'operations': [ { 'operation': 'select_point', - 'point': point + 'point': [lat, lon], + 'output_csv': csv } ] }, poll=poll) - def mask_bbox(self, paths, bbox, poll=None): + def mask_bbox(self, paths, west, east, south, north, mean=False, csv=False, poll=None): self.check('v2') return self.post_job({ 'paths': paths, 'operations': [ { 'operation': 'mask_bbox', - 'bbox': bbox + 'bbox': [west, east, south, north], + 'compute_mean': mean, + 'output_csv': csv } ] }, poll=poll) - def mask_country(self, paths, country, poll=None): + def mask_country(self, paths, country, mean=False, csv=False, poll=None): self.check('v2') return self.post_job({ 'paths': paths, 'operations': [ { 'operation': 'mask_country', - 'country': country + 'country': country, + 'compute_mean': mean, + 'output_csv': csv } ] }, poll=poll) @@ -276,7 +289,7 @@ def mask_landonly(self, paths, poll=None): ] }, poll=poll) - def mask_mask(self, paths, mask, var, compute_mean=False, output_csv=False, poll=None): + def mask_mask(self, paths, mask, var, mean=False, csv=False, poll=None): self.check('v2') mask = Path(mask) return self.post_job({ @@ -285,14 +298,14 @@ def mask_mask(self, paths, mask, var, compute_mean=False, output_csv=False, poll { 'operation': 'mask_mask', 'mask': mask.name, - 'compute_mean': compute_mean, - 'output_csv': output_csv, + 'compute_mean': mean, + 'output_csv': csv, 'var': var } ] }, uploads=[mask], poll=poll) - def mask_shape(self, paths, shape, layer, compute_mean=False, output_csv=False, poll=None): + def mask_shape(self, paths, shape, layer, mean=False, csv=False, poll=None): self.check('v2') shape = Path(shape) mask = shape.with_suffix('.nc') @@ -308,33 +321,36 @@ def mask_shape(self, paths, shape, layer, compute_mean=False, output_csv=False, { 'operation': 'mask_mask', 'mask': mask.name, - 'compute_mean': compute_mean, - 'output_csv': output_csv, + 'compute_mean': mean, + 'output_csv': csv, 'var': var } ] }, uploads=[shape], poll=poll) - def cutout_bbox(self, paths, bbox, poll=None): + def cutout_bbox(self, paths, west, east, south, north, mean=False, csv=False, poll=None): self.check('v2') return self.post_job({ 'paths': paths, 'operations': [ { 'operation': 'cutout_bbox', - 'bbox': bbox + 'bbox': [west, east, south, north], + 'compute_mean': mean, + 'output_csv': csv } ] }, poll=poll) - def cutout_point(self, paths, point, poll=None): + def cutout_point(self, paths, lat, lon, csv=False, poll=None): self.check('v2') return self.post_job({ 'paths': paths, 'operations': [ { 'operation': 'cutout_point', - 'point': point + 'point': [lat, lon], + 'output_csv': csv } ] }, poll=poll) @@ -346,7 +362,7 @@ def download(self, url, path=None, validate=False, extract=True): headers = self.headers.copy() file_name = Path(urlparse(url).path.split('/')[-1]) - file_path = (Path(path) if path else Path.cwd()) / file_name + file_path = (Path(path).expanduser() if path else Path.cwd()) / file_name file_path.parent.mkdir(exist_ok=True, parents=True) if file_path.exists(): # resume download diff --git a/pyproject.toml b/pyproject.toml index 7ff444b..e2df16d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,9 @@ classifiers = [ 'Programming Language :: Python :: 3.11', ] dependencies = [ - "requests>=2" + "click>=8", + "requests>=2", + "rich>=13" ] dynamic = ["version"] @@ -41,6 +43,9 @@ dev = [ [project.urls] Repository = "https://github.com/ISI-MIP/isimip-client" +[project.scripts] +isimip-client = "isimip_client.cli:main" + [tool.setuptools] packages = ["isimip_client"] From 7ba4f374568cb0fc7e1474d754c37a7218ca5724 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 11 Dec 2024 15:55:38 +0100 Subject: [PATCH 03/11] Update README --- README.md | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 142 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ba66968..0c528b3 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ isimip-client [![Latest release](https://shields.io/github/v/release/ISI-MIP/isimip-client)](https://github.com/ISI-MIP/isimip-client/releases) [![PyPI release](https://img.shields.io/pypi/v/isimip-client)](https://pypi.org/project/isimip-client/) -[![Python Version](https://img.shields.io/badge/python->=3.8-blue)](https://www.python.org/) +[![Python Version](https://img.shields.io/badge/python->=3.9-blue)](https://www.python.org/) [![License](https://img.shields.io/badge/License-MIT-green)](https://github.com/ISI-MIP/isimip-qc/blob/master/LICENSE) A *thin* client library to use the API of the [ISIMIP repository](https://data.isimip.org) using Python. @@ -11,7 +11,7 @@ A *thin* client library to use the API of the [ISIMIP repository](https://data.i Setup ----- -The library is written in Python (> 3.6) uses only dependencies, which can be installed without administrator priviledges. The installation of Python (and its developing packages), however differs from operating system to operating system. Optional Git is needed if the application is installed directly from GitHub. The installation of Python 3 and Git for different plattforms is documented [here](https://github.com/ISI-MIP/isimip-utils/blob/master/docs/prerequisites.md). +The library is written in Python (> 3.9) uses only dependencies, which can be installed without administrator priviledges. The installation of Python (and its developing packages), however differs from operating system to operating system. The installation of Python 3 for different plattforms is documented [here](https://github.com/ISI-MIP/isimip-utils/blob/master/docs/prerequisites.md). The library can be installed via pip. Usually you want to create a [virtual environment](https://docs.python.org/3/library/venv.html) first, but this is optional: @@ -34,17 +34,22 @@ pip install isimip-client Usage ----- -The library is used in the following way: +The package provides a the Python class `ISIMIPClient` which can be used in scripts or notebooks in the following way: ```python from isimip_client.client import ISIMIPClient + client = ISIMIPClient() +``` +The methods of this `client` object can then be used to perform queries to the ISIMIP Repository, e.g. to seach for datasets + +```python # search the ISIMIP repository using a search string response = client.datasets(query='gfdl-esm4 ssp370 pr') # search the ISIMIP repository for a specific subtree -response = client.datasets(tree='ISIMIP3b/InputData/climate/atmosphere/global/daily/ssp370/gfdl-esm4/r1i1p1f1/w5e5/pr') +response = client.datasets(path='ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp370/GFDL-ESM4/') # search the ISIMIP repository using specifiers response = client.datasets(simulation_round='ISIMIP3b', @@ -54,15 +59,146 @@ response = client.datasets(simulation_round='ISIMIP3b', climate_variable='pr') ``` -In order to use the `dev` version of the repository use: +The response object is a dictionary of the form + +```python +{ + "count": 1001, + "next": "https://data.isimip.org/api/v1/datasets/?page=2&...", + "previous": null, + "results": [ + ... + ] +} +``` + +where each result contains the information for one dataset matching the provided search criteria. By default, only 10 datasets are returned and you can access the next 10 by providing `page=2` to the `datasets` method. You can also use `page_size=N` to increase the number of returned results per page. + +Similar searches can be performed on the `files` endpoint, e.g.: + +```python +response = client.files(...) +``` + +The ISIMIP Repository proviedes a "Configure download" feature, which can be used to perform operations on a set of files before downloading them. A common use case it the cut-out of a specific region. Technical details about this Files API can be found [here](https://github.com/ISI-MIP/isimip-files-api). The client can be used to perform the same operations which are available on the webpage: + +```python +response = client.select_bbox(paths, west, east, south, north, poll=poll) + +response = client.select_point(paths, lat, lon, poll=poll) + +response = client.mask_bbox(paths, west, east, south, north, poll=poll) + +response = client.mask_country(paths, country, poll=poll) + +response = client.mask_landonly(paths, poll=poll) + +response = client.mask_mask(paths, mask, var, poll=poll) + +response = client.mask_shape(paths, shapefile, layer, poll=poll) + +response = client.mask_shape(paths, geojson, layer, poll=poll) + +response = client.cutout_bbox(paths, west, east, south, north, poll=poll) + +response = client.cutout_point(paths, lat, lon, poll=poll) + +# in order to download the created zip file, download method can be used +client.download(response['file_url'], path='downloads') +``` + +In addition, the client allows to use the API with a custom list of operations. In order to first cut out a rectangular area from the CHELSA high resolution data and then cut out a shape from a shapefile, you can use: ```python -client = ISIMIPClient(data_url='https://dev.isimip.org/api/v1', auth=(USER, PASS)) +# Admin 0 - Countries from https://www.naturalearthdata.com +ne_shape = Path('~/data/isimip/shapes/ne_10m_admin_0_sovereignty.zip') +ne_mask = ne_shape.with_suffix('.nc') + +# ISIMIP3a high resolution precipitation input data +paths = [ + 'ISIMIP3a/InputData/climate/atmosphere/obsclim/global/daily/historical/CHELSA-W5E5/chelsa-w5e5_obsclim_pr_30arcsec_global_daily_201612.nc', + ... +] + +# chain of operations +operations = [ + { + 'operation': 'cutout_bbox', + 'bbox': [ + 5.800, # west + 10.600, # east + 45.800, # south + 47.900 # north + ] + }, + { + 'operation': 'create_mask', + 'shape': ne_shape.name, + 'mask': ne_mask.name, + }, + { + 'operation': 'mask_mask', + 'mask': ne_mask.name, + 'var': 'm_91' # switzerland layer 91 in the shapefile + } +] + +# list of uploaded files, referenced in the operations list +uploads = [ne_shape] + +# sumbit the prepared job to the API and poll every 4 seconds for it's status +response = client.submit_job(paths, operations, uploads, poll=4) +``` + +Before 2025, the File API was only available in its first version, which can still be used: + +```python +client = ISIMIPClient(files_api_url='https://files.isimip.org/api/v1', files_api_version='v1') + +client.select(paths, bbox=[south, north, west, east]) +client.select(paths, point=(lat, lon)) +client.select(paths, country=country) + +client.mask(paths, bbox=[south, north, west, east]) +client.mask(paths, country=country) +client.mask(paths, landonly=True) + +client.cutout(paths, bbox=[south, north, west, east]) ``` More examples can be found in the [notebooks directory](/notebooks). +Command line client +------------------- + +Most features of the client can also be used on the command line using the `isimip-client` command, e.g.: + +```bash +isimip-client select_bbox [PATHS]... --west=-20 --east=20 --south=-10 --north=10 + +isimip-client select_point [PATHS]... --lat=6.25 --lon=18.17 + +isimip-client mask_bbox [PATHS]... --west=-20 --east=20 --south=-10 --north=10 + +isimip-client mask_country [PATHS]... --country=bra + +isimip-client mask_landonly [PATHS]... + +isimip-client mask_mask [PATHS]... --mask=~/data/isimip/api/countrymasks.nc --var=m_AUS + +isimip-client mask_shape [PATHS]... --shape=~/data/isimip/shapes/World_Continents.zip --layer=3 + +isimip-client mask_shape [PATHS]... --shape=~/data/isimip/shapes/World_Continents.geojson --layer=4 + +isimip-client cutout_bbox [PATHS]... --west=-20 --east=20 --south=-10 --north=10 + +isimip-client cutout_point [PATHS]... --lat=6.25 --lon=18.17 +``` + +where `[PATHS]...` denotes the list of ISIMIP file path to process, seperated by spaces. + + Jupyter notebooks ----------------- From 51aa6f5226a580fd8483ab030facb29a362fbe6c Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 11 Dec 2024 15:56:34 +0100 Subject: [PATCH 04/11] Refactor notebooks --- notebooks/cutout.ipynb | 133 ------------------- notebooks/datasets.ipynb | 114 ---------------- notebooks/files-api.ipynb | 237 ++++++++++++++++++++++++++++++++++ notebooks/files.ipynb | 123 ------------------ notebooks/mask.ipynb | 185 -------------------------- notebooks/repository.ipynb | 147 +++++++++++++++++++++ notebooks/select-points.ipynb | 117 ----------------- notebooks/select.ipynb | 198 ---------------------------- 8 files changed, 384 insertions(+), 870 deletions(-) delete mode 100644 notebooks/cutout.ipynb delete mode 100644 notebooks/datasets.ipynb create mode 100644 notebooks/files-api.ipynb delete mode 100644 notebooks/files.ipynb delete mode 100644 notebooks/mask.ipynb create mode 100644 notebooks/repository.ipynb delete mode 100644 notebooks/select-points.ipynb delete mode 100644 notebooks/select.ipynb diff --git a/notebooks/cutout.ipynb b/notebooks/cutout.ipynb deleted file mode 100644 index 0c64e16..0000000 --- a/notebooks/cutout.ipynb +++ /dev/null @@ -1,133 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# install isimip-client from GitHub\n", - "!pip install isimip-client\n", - "# install dependecies for plotting\n", - "!pip install netCDF4 matplotlib pandas" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from isimip_client.client import ISIMIPClient\n", - "client = ISIMIPClient()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# to cut out a bounding box in lat/lon use\n", - "# run this subsequently to poll the status\n", - "path = 'ISIMIP3a/SecondaryInputData/climate/atmosphere/obsclim/global/daily/historical/CHELSA-W5E5v1.0/chelsa-w5e5v1.0_obsclim_tas_30arcsec_global_daily_201601.nc'\n", - "response = client.cutout(path, bbox=[-45.108, -41.935, 167.596, 173.644])\n", - "response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# once the status is 'finished', get the url to download the result\n", - "client.download(response['file_url'], path='downloads', validate=False, extract=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# this checking can be automated using poll=