From f957136cdfb7771fa3acf2ef4ef5b0d2881eff3e Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 09:56:02 +0100 Subject: [PATCH 01/10] [sc-124814] Add brotli compression --- code-env/python/spec/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index b443370..5f763dd 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -1,2 +1,4 @@ jsonpath-ng==1.5.3 requests_ntlm==1.1.0 +requests==2.26.0 +Brotli==1.0.9 From 9b2109de15d72f490e182d57f0818347ca45e867 Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 09:58:40 +0100 Subject: [PATCH 02/10] [sc-124815] Use requests.session --- python-lib/rest_api_client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-lib/rest_api_client.py b/python-lib/rest_api_client.py index bd173d6..e3998be 100644 --- a/python-lib/rest_api_client.py +++ b/python-lib/rest_api_client.py @@ -91,6 +91,7 @@ def __init__(self, credential, endpoint, custom_key_values={}): self.requests_kwargs.update({"json": get_dku_key_values(key_value_body)}) self.metadata = {} self.call_number = 0 + self.session = requests.Session() def set_login(self, credential): login_type = credential.get("login_type", "no_auth") @@ -171,12 +172,12 @@ def request(self, method, url, can_raise_exeption=True, **kwargs): def request_with_redirect_retry(self, method, url, **kwargs): # In case of redirection to another domain, the authorization header is not kept # If redirect_auth_header is true, another attempt is made with initial headers to the redirected url - response = requests.request(method, url, **kwargs) + response = self.session.request(method, url, **kwargs) if self.redirect_auth_header and not response.url.startswith(url): redirection_kwargs = copy.deepcopy(kwargs) redirection_kwargs.pop("params", None) # params are contained in the redirected url logger.warning("Redirection ! Accessing endpoint {} with initial authorization headers".format(response.url)) - response = requests.request(method, response.url, **redirection_kwargs) + response = self.session.request(method, response.url, **redirection_kwargs) return response def paginated_api_call(self, can_raise_exeption=True): From 13a06f4c8ccb6ee8a224fffca5d70f8b41d484bb Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 10:07:07 +0100 Subject: [PATCH 03/10] Plugin version as parameter --- custom-recipes/api-connect/recipe.py | 4 +++- python-connectors/api-connect_dataset/connector.py | 2 +- python-lib/dku_constants.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/custom-recipes/api-connect/recipe.py b/custom-recipes/api-connect/recipe.py index 97d6a29..019ff66 100644 --- a/custom-recipes/api-connect/recipe.py +++ b/custom-recipes/api-connect/recipe.py @@ -5,6 +5,7 @@ from safe_logger import SafeLogger from dku_utils import get_dku_key_values, get_endpoint_parameters from rest_api_recipe_session import RestApiRecipeSession +from dku_constants import DKUConstants logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) @@ -24,7 +25,8 @@ def get_partitioning_keys(id_list, dku_flow_variables): partitioning_keys[dimension] = dku_flow_variables.get(dimension_src) return partitioning_keys -logger.info('API-Connect plugin recipe v1.1.2') + +logger.info('API-Connect plugin recipe v{}'.format(DKUConstants.PLUGIN_VERSION)) input_A_names = get_input_names_for_role('input_A_role') config = get_recipe_config() diff --git a/python-connectors/api-connect_dataset/connector.py b/python-connectors/api-connect_dataset/connector.py index a1212d3..584950a 100644 --- a/python-connectors/api-connect_dataset/connector.py +++ b/python-connectors/api-connect_dataset/connector.py @@ -14,7 +14,7 @@ class RestAPIConnector(Connector): def __init__(self, config, plugin_config): Connector.__init__(self, config, plugin_config) # pass the parameters to the base class - logger.info('API-Connect plugin connector v1.1.2') + logger.info('API-Connect plugin connector v{}'.format(DKUConstants.PLUGIN_VERSION)) logger.info("config={}".format(logger.filter_secrets(config))) endpoint_parameters = get_endpoint_parameters(config) credential = config.get("credential", {}) diff --git a/python-lib/dku_constants.py b/python-lib/dku_constants.py index 7d62cf5..e7beb64 100644 --- a/python-lib/dku_constants.py +++ b/python-lib/dku_constants.py @@ -2,3 +2,4 @@ class DKUConstants(object): API_RESPONSE_KEY = "api_response" RAW_BODY_FORMAT = "RAW" FORM_DATA_BODY_FORMAT = "FORM_DATA" + PLUGIN_VERSION = "1.1.3-beta.1" From a310d549608c4bb259f7164a36a731e6988b90da Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 10:12:06 +0100 Subject: [PATCH 04/10] Fix typo --- custom-recipes/api-connect/recipe.py | 2 +- python-connectors/api-connect_dataset/connector.py | 2 +- python-lib/rest_api_client.py | 2 +- python-lib/rest_api_recipe_session.py | 2 +- python-lib/safe_logger.py | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/custom-recipes/api-connect/recipe.py b/custom-recipes/api-connect/recipe.py index 019ff66..b758f3d 100644 --- a/custom-recipes/api-connect/recipe.py +++ b/custom-recipes/api-connect/recipe.py @@ -8,7 +8,7 @@ from dku_constants import DKUConstants -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) def get_partitioning_keys(id_list, dku_flow_variables): diff --git a/python-connectors/api-connect_dataset/connector.py b/python-connectors/api-connect_dataset/connector.py index 584950a..5a37307 100644 --- a/python-connectors/api-connect_dataset/connector.py +++ b/python-connectors/api-connect_dataset/connector.py @@ -7,7 +7,7 @@ import json -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) class RestAPIConnector(Connector): diff --git a/python-lib/rest_api_client.py b/python-lib/rest_api_client.py index e3998be..cdd65a9 100644 --- a/python-lib/rest_api_client.py +++ b/python-lib/rest_api_client.py @@ -8,7 +8,7 @@ from dku_constants import DKUConstants -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) class RestAPIClientError(ValueError): diff --git a/python-lib/rest_api_recipe_session.py b/python-lib/rest_api_recipe_session.py index c3340ec..d27db54 100644 --- a/python-lib/rest_api_recipe_session.py +++ b/python-lib/rest_api_recipe_session.py @@ -6,7 +6,7 @@ import copy import json -logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) class RestApiRecipeSession: diff --git a/python-lib/safe_logger.py b/python-lib/safe_logger.py index d43196a..db48711 100644 --- a/python-lib/safe_logger.py +++ b/python-lib/safe_logger.py @@ -3,14 +3,14 @@ class SafeLogger(object): - def __init__(self, name, forbiden_keys=None): + def __init__(self, name, forbidden_keys=None): self.name = name self.logger = logging.getLogger(self.name) logging.basicConfig( level=logging.INFO, format='{} %(levelname)s - %(message)s'.format(self.name) ) - self.forbiden_keys = forbiden_keys + self.forbidden_keys = forbidden_keys def info(self, message): self.logger.info(message) @@ -33,7 +33,7 @@ def dig_secrets(self, dictionary): for key in dictionary: if isinstance(dictionary[key], dict): dictionary[key] = self.filter_secrets(dictionary[key]) - if key in self.forbiden_keys: + if key in self.forbidden_keys: dictionary[key] = hash(dictionary[key]) return dictionary From 501de2e5ad2f28a14db3eec208d71b96fccaf5c9 Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 10:19:20 +0100 Subject: [PATCH 05/10] [sc-124658] remove api_key_value from logs --- custom-recipes/api-connect/recipe.py | 2 +- python-connectors/api-connect_dataset/connector.py | 2 +- python-lib/dku_constants.py | 1 + python-lib/rest_api_client.py | 2 +- python-lib/rest_api_recipe_session.py | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/custom-recipes/api-connect/recipe.py b/custom-recipes/api-connect/recipe.py index b758f3d..9d6df39 100644 --- a/custom-recipes/api-connect/recipe.py +++ b/custom-recipes/api-connect/recipe.py @@ -8,7 +8,7 @@ from dku_constants import DKUConstants -logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) def get_partitioning_keys(id_list, dku_flow_variables): diff --git a/python-connectors/api-connect_dataset/connector.py b/python-connectors/api-connect_dataset/connector.py index 5a37307..5e418ec 100644 --- a/python-connectors/api-connect_dataset/connector.py +++ b/python-connectors/api-connect_dataset/connector.py @@ -7,7 +7,7 @@ import json -logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) class RestAPIConnector(Connector): diff --git a/python-lib/dku_constants.py b/python-lib/dku_constants.py index e7beb64..14ef417 100644 --- a/python-lib/dku_constants.py +++ b/python-lib/dku_constants.py @@ -3,3 +3,4 @@ class DKUConstants(object): RAW_BODY_FORMAT = "RAW" FORM_DATA_BODY_FORMAT = "FORM_DATA" PLUGIN_VERSION = "1.1.3-beta.1" + FORBIDDEN_KEYS = ["token", "password", "api_key_value"] diff --git a/python-lib/rest_api_client.py b/python-lib/rest_api_client.py index cdd65a9..1a62bc1 100644 --- a/python-lib/rest_api_client.py +++ b/python-lib/rest_api_client.py @@ -8,7 +8,7 @@ from dku_constants import DKUConstants -logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) class RestAPIClientError(ValueError): diff --git a/python-lib/rest_api_recipe_session.py b/python-lib/rest_api_recipe_session.py index d27db54..6882f5c 100644 --- a/python-lib/rest_api_recipe_session.py +++ b/python-lib/rest_api_recipe_session.py @@ -6,7 +6,7 @@ import copy import json -logger = SafeLogger("api-connect plugin", forbidden_keys=["token", "password"]) +logger = SafeLogger("api-connect plugin", forbidden_keys=DKUConstants.FORBIDDEN_KEYS) class RestApiRecipeSession: From 6e820897796a1cc2279ca30b58b839649e5582fa Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 10:20:07 +0100 Subject: [PATCH 06/10] reordering --- python-lib/dku_constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-lib/dku_constants.py b/python-lib/dku_constants.py index 14ef417..b8e8a09 100644 --- a/python-lib/dku_constants.py +++ b/python-lib/dku_constants.py @@ -1,6 +1,6 @@ class DKUConstants(object): API_RESPONSE_KEY = "api_response" - RAW_BODY_FORMAT = "RAW" + FORBIDDEN_KEYS = ["token", "password", "api_key_value"] FORM_DATA_BODY_FORMAT = "FORM_DATA" PLUGIN_VERSION = "1.1.3-beta.1" - FORBIDDEN_KEYS = ["token", "password", "api_key_value"] + RAW_BODY_FORMAT = "RAW" From 38c18cf10bd005cca973d6b8c5ab19183cfc74bb Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 10:27:50 +0100 Subject: [PATCH 07/10] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bdae53..227e1d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## [Version 1.1.3](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.3) - Feature and bugfix release - 2023-02-28 + +- Add Brotli compression +- Faster recurring calls + ## [Version 1.1.2](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.2) - Bugfix release - 2022-10-19 - Fix for last page of RFC5988 pagination triggering loop condtion From b71599d58eb1e349dd3bb3925eade440780f98b5 Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Tue, 28 Feb 2023 10:27:57 +0100 Subject: [PATCH 08/10] version++ --- plugin.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin.json b/plugin.json index 52543a3..777d6f5 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "api-connect", - "version": "1.1.2", + "version": "1.1.3", "meta": { "label": "API Connect", "description": "Retrieve data from any REST API", From 11e7f1373b97ef69a17ecdfddc6452f18873bdc9 Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Wed, 1 Mar 2023 10:15:28 +0100 Subject: [PATCH 09/10] Keep same requests session for each row in recipe --- python-lib/rest_api_client.py | 14 +++++++++----- python-lib/rest_api_recipe_session.py | 7 +++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/python-lib/rest_api_client.py b/python-lib/rest_api_client.py index e3998be..1133c8d 100644 --- a/python-lib/rest_api_client.py +++ b/python-lib/rest_api_client.py @@ -17,7 +17,7 @@ class RestAPIClientError(ValueError): class RestAPIClient(object): - def __init__(self, credential, endpoint, custom_key_values={}): + def __init__(self, credential, endpoint, custom_key_values={}, session=None): logger.info("Initialising RestAPIClient, credential={}, endpoint={}".format(logger.filter_secrets(credential), endpoint)) # presets_variables contains all variables available in templates using the {{variable_name}} notation @@ -91,7 +91,7 @@ def __init__(self, credential, endpoint, custom_key_values={}): self.requests_kwargs.update({"json": get_dku_key_values(key_value_body)}) self.metadata = {} self.call_number = 0 - self.session = requests.Session() + self.session = session or requests.Session() def set_login(self, credential): login_type = credential.get("login_type", "no_auth") @@ -132,19 +132,23 @@ def request(self, method, url, can_raise_exeption=True, **kwargs): raise RestAPIClientError("The api-connect plugin is stuck in a loop. Please check the pagination parameters.") request_start_time = time.time() self.time_last_request = request_start_time + error_message = None try: response = self.request_with_redirect_retry(method, url, **kwargs) - request_finish_time = time.time() except Exception as err: self.pagination.is_last_batch_empty = True error_message = "Error: {}".format(err) if can_raise_exeption: raise RestAPIClientError(error_message) - else: - return {"error": error_message} + + request_finish_time = time.time() self.set_metadata("request_duration", request_finish_time - request_start_time) self.set_metadata("status_code", response.status_code) self.set_metadata("response_headers", "{}".format(response.headers)) + + if error_message: + return {"error": error_message} + if response.status_code >= 400: error_message = "Error {}: {}".format(response.status_code, response.content) self.pagination.is_last_batch_empty = True diff --git a/python-lib/rest_api_recipe_session.py b/python-lib/rest_api_recipe_session.py index c3340ec..3f91c20 100644 --- a/python-lib/rest_api_recipe_session.py +++ b/python-lib/rest_api_recipe_session.py @@ -5,6 +5,8 @@ from dku_constants import DKUConstants import copy import json +import requests + logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) @@ -38,6 +40,7 @@ def get_column_to_parameter_dict(parameter_columns, parameter_renamings): def process_dataframe(self, input_parameters_dataframe, is_raw_output): results = [] time_last_request = None + session = requests.Session() for index, input_parameters_row in input_parameters_dataframe.iterrows(): rows_count = 0 self.initial_parameter_columns = {} @@ -52,7 +55,7 @@ def process_dataframe(self, input_parameters_dataframe, is_raw_output): updated_endpoint_parameters, self.custom_key_values )) - self.client = RestAPIClient(self.credential_parameters, updated_endpoint_parameters, custom_key_values=self.custom_key_values) + self.client = RestAPIClient(self.credential_parameters, updated_endpoint_parameters, custom_key_values=self.custom_key_values, session=session) self.client.time_last_request = time_last_request while self.client.has_more_data(): page_results = self.retrieve_next_page(is_raw_output) @@ -76,7 +79,7 @@ def retrieve_next_page(self, is_raw_output): if self.can_raise: raise DataikuException(error_message) else: - return [{"error": error_message}] + return self.format_page_rows([{"error": error_message}], is_raw_output, metadata) page_rows.extend(self.format_page_rows(data_rows, is_raw_output, metadata)) else: # Todo: check api_response key is free and add something overwise From 2f2b3dbf235c1417c4fd3aa67248656da8bdbe8b Mon Sep 17 00:00:00 2001 From: Alexandre Bourret Date: Wed, 31 May 2023 11:24:02 +0200 Subject: [PATCH 10/10] v1.1.4 updates for DSS 11 --- CHANGELOG.md | 6 +++++- plugin.json | 2 +- python-lib/dku_constants.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 227e1d6..8fe3879 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,14 @@ # Changelog -## [Version 1.1.3](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.3) - Feature and bugfix release - 2023-02-28 +## [Version 1.1.4](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.4) - Feature and bugfix release - 2023-02-28 - Add Brotli compression - Faster recurring calls +## [Version 1.1.3](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.3) - Bugfix release - 2023-04-18 + +- Updated code-env descriptor for DSS 12 + ## [Version 1.1.2](https://github.com/dataiku/dss-plugin-api-connect/releases/tag/v1.1.2) - Bugfix release - 2022-10-19 - Fix for last page of RFC5988 pagination triggering loop condtion diff --git a/plugin.json b/plugin.json index 777d6f5..28b78a4 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "api-connect", - "version": "1.1.3", + "version": "1.1.4", "meta": { "label": "API Connect", "description": "Retrieve data from any REST API", diff --git a/python-lib/dku_constants.py b/python-lib/dku_constants.py index b8e8a09..e0dc24e 100644 --- a/python-lib/dku_constants.py +++ b/python-lib/dku_constants.py @@ -2,5 +2,5 @@ class DKUConstants(object): API_RESPONSE_KEY = "api_response" FORBIDDEN_KEYS = ["token", "password", "api_key_value"] FORM_DATA_BODY_FORMAT = "FORM_DATA" - PLUGIN_VERSION = "1.1.3-beta.1" + PLUGIN_VERSION = "1.1.4" RAW_BODY_FORMAT = "RAW"