From e39ec2c8b69d4fdf7b36cf4004607a89d2b2bc28 Mon Sep 17 00:00:00 2001 From: Clement Stenac Date: Thu, 30 Apr 2020 10:24:29 +0200 Subject: [PATCH] API for data snapshots experiment --- dataikuapi/dss/dataset.py | 71 +++++++++++++++++++++++++++++++++++++++ dataikuapi/dss/project.py | 32 ++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/dataikuapi/dss/dataset.py b/dataikuapi/dss/dataset.py index 2a8ee541..4fc41484 100644 --- a/dataikuapi/dss/dataset.py +++ b/dataikuapi/dss/dataset.py @@ -603,6 +603,77 @@ def new_recipe(self, type, recipe_name=None): builder.with_input(self.dataset_name) return builder + def list_snapshots(self): + """ + Lists the data snapshots of the project containing data for this dataset + """ + snapshots = self.client._perform_json("GET", + "/projects/%s/datasets/%s/snapshots"% (self.project_key, self.dataset_name)) + + return [DSSDatasetSnapshot(self, snapshot) for snapshot in snapshots] + + def restore_snapshot(self, snapshot_id, target_name, target_settings): + future_response = self.client._perform_json("POST", + "/projects/%s/datasets/%s/snapshots/%s/actions/restoreToNew"% (self.project_key, self.dataset_name, snapshot_id), + body = {"creationSettings" : target_settings}) + + return DSSFuture(self.client, future_response.get('jobId', None), future_response) + +class DSSDatasetSnapshot(object): + """ + A reference to a data snapshot of a project containing data for a particular dataset. + Do not instantiate this class, use :meth:`DSSDataset.list_snapshots` + """ + def __init__(self, dataset, snapshot): + self.dataset = dataset + self.snapshot = snapshot + + @property + def id(self): + """Snapshot id""" + return self.snapshot["bundleId"] + + @property + def type(self): + """ + Type of this data snapshot. Either DATA_SNAPSHOT for a pure data snapshot or BUNDLE for a + full project bundle + """ + return self.snapshot["exportManifest"]["exportType"] + + @property + def git_commit_info(self): + """ + Details about the last commit of the project prior to the snapshot, if available + + Returns a dict, that may be empty if information is not available + """ + return self.snapshot["exportManifest"].get("gitCommitInfo", {}) + + def restore_to_new_managed_dataset(self, name, connection, type=None, format=None): + """ + Restores data from this snapshot to a new managed dataset. + + Returns a future to wait for the restore task to complete + + :param str name: name of the dataset to create + :param str connection: name of the connection to create the dataset on + :param str type: type of dataset, for connection where the type could be ambiguous. Typically, + this is SCP or SFTP, for SSH connection + :param str format: name of a format preset relevant for the dataset type. Possible values are: CSV_ESCAPING_NOGZIP_FORHIVE, + CSV_UNIX_GZIP, CSV_EXCEL_GZIP, CSV_EXCEL_GZIP_BIGQUERY, CSV_NOQUOTING_NOGZIP_FORPIG, PARQUET_HIVE, + AVRO, ORC. If None, uses the default + :rtype: `dataikuapi.dss.future.DSSFuture` + """ + ch = self.dataset.project.new_managed_dataset_creation_helper(name) + ch.with_store_into(connection, type_option_id=type, format_option_id=format) + + future_response = self.dataset.client._perform_json("POST", + "/projects/%s/datasets/%s/snapshots/%s/actions/restoreToNew"% (self.dataset.project_key, self.dataset.dataset_name, self.id), + body = {"creationSettings" : ch.creation_settings}) + return DSSFuture(self.dataset.client, future_response.get('jobId', None), future_response, + lambda x: DSSDataset(self.dataset.client, self.dataset.project_key, name)) + class DSSDatasetSettings(object): def __init__(self, dataset, settings): self.dataset = dataset diff --git a/dataikuapi/dss/project.py b/dataikuapi/dss/project.py index d342109b..6c499260 100644 --- a/dataikuapi/dss/project.py +++ b/dataikuapi/dss/project.py @@ -1113,6 +1113,38 @@ def get_app_manifest(self): raw_data = self.client._perform_json("GET", "/projects/%s/app-manifest" % self.project_key) return DSSAppManifest(self.client, raw_data) + def new_snapshot(self, snapshot_id): + return SnapshotBuilder(self, snapshot_id) + + + +class SnapshotBuilder(object): + def __init__(self, project, snapshot_id): + self.project = project + self.project_key = project.project_key + self.client = project.client + self.snapshot_id = snapshot_id + self.settings = { + "includedDatasetsData" : [], + "includedSavedModels" : [], + "includedManagedFolders" : [] + } + + def add_dataset(self, dataset): + if isinstance(dataset, DSSDataset): + dataset = dataset.name + self.settings["includedDatasetsData"].append({"name" : dataset}) + + def start(self): + future_response = self.client._perform_json("PUT", + "/projects/%s/snapshots/%s" % (self.project_key, self.snapshot_id), + body = self.settings) + + return DSSFuture(self.client, future_response.get('jobId', None), future_response) + + + + class TablesImportDefinition(object): """