TileDB-Inc · georgeSkoumas · Jul 22, 2021 · Jul 22, 2021 · Jul 22, 2021
diff --git a/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb b/machine_learning/models/pytorch_tiledb_cloud_ml_model_array.ipynb
@@ -0,0 +1,378 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "In this notebook, we show how we can train a model with PyTorch and save it as a TileDB array on TileDB-Cloud.\n",
+    "Firstly, let's import what we need and define some variables needed for training a model."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "import tiledb.cloud\n",
+    "import os\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
+    "import torchvision\n",
+    "\n",
+    "from tiledb.ml.models.pytorch import PyTorchTileDBModel\n",
+    "\n",
+    "epochs = 1\n",
+    "batch_size_train = 128\n",
+    "batch_size_test = 1000\n",
+    "learning_rate = 0.01\n",
+    "momentum = 0.5\n",
+    "log_interval = 10\n",
+    "\n",
+    "# Set random seeds for anything using random number generation\n",
+    "random_seed = 1\n",
+    "\n",
+    "# Disable nondeterministic algorithms\n",
+    "torch.backends.cudnn.enabled = False\n",
+    "torch.manual_seed(random_seed)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We then have to export and load our TileDB-Cloud credentials. For TileDB cloud you can also use a token.\n",
+    "You have to also set up your AWS credentials on your TileDB-Cloud account."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# This is also our namespace on TileDB-Cloud.\n",
+    "TILEDB_USER_NAME = os.environ.get('TILEDB_USER_NAME')\n",
+    "TILEDB_PASSWD = os.environ.get('TILEDB_PASSWD')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We then create a TileDB-Cloud context and set up our communication with TileDB-Cloud."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "ctx = tiledb.cloud.Ctx()\n",
+    "tiledb.cloud.login(username=TILEDB_USER_NAME, password=TILEDB_PASSWD)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We  will also need the DataLoaders API for the dataset. We will also employ TorchVision which let's as load the MNIST\n",
+    "dataset in a handy way. We'll use a batch_size of 64 for training while the values 0.1307 and 0.3081 used for\n",
+    "the Normalize() transformation below are the global mean and standard deviation of the MNIST dataset,\n",
+    "we'll take them as a given here."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "logging.getLogger(\"lightning\").setLevel(logging.ERROR)\n",
+    "\n",
+    "train_loader = torch.utils.data.DataLoader(\n",
+    "  torchvision.datasets.MNIST('', train=True, download=True,\n",
+    "                             transform=torchvision.transforms.Compose([\n",
+    "                               torchvision.transforms.ToTensor(),\n",
+    "                               torchvision.transforms.Normalize(\n",
+    "                                 (0.1307,), (0.3081,))\n",
+    "                             ])),\n",
+    "  batch_size=batch_size_train, shuffle=True)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Moving on, we build our network. We'll use two 2-D convolutional layers followed by two fully-connected\n",
+    "layers. As activation function we'll choose ReLUs and as a means of regularization we'll use two dropout layers."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "class Net(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(Net, self).__init__()\n",
+    "        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n",
+    "        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)\n",
+    "        self.conv2_drop = nn.Dropout2d()\n",
+    "        self.fc1 = nn.Linear(320, 50)\n",
+    "        self.fc2 = nn.Linear(50, 10)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = F.relu(F.max_pool2d(self.conv1(x), 2))\n",
+    "        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n",
+    "        x = x.view(-1, 320)\n",
+    "        x = F.relu(self.fc1(x))\n",
+    "        x = F.dropout(x, training=self.training)\n",
+    "        x = self.fc2(x)\n",
+    "        return F.log_softmax(x, dim = 1)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We will now initialise our Neural Network and optimizer."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "model = Net()\n",
+    "optimizer = optim.SGD(model.parameters(), lr=learning_rate,\n",
+    "                      momentum=momentum)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We continue with the training loop and we iterate over all training data once per epoch. Loading the individual batches\n",
+    "is handled by the DataLoader. We need to set the gradients to zero using optimizer.zero_grad() since PyTorch by default\n",
+    "accumulates gradients. We then produce the output of the network (forward pass) and compute a negative log-likelihodd\n",
+    "loss between the output and the ground truth label. The backward() call we now collect a new set of gradients which we\n",
+    "propagate back into each of the network's parameters using optimizer.step()."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "train_losses = []\n",
+    "train_counter = []\n",
+    "\n",
+    "def train(epoch):\n",
+    "  model.train()\n",
+    "  for batch_idx, (data, target) in enumerate(train_loader):\n",
+    "    optimizer.zero_grad()\n",
+    "    output = model(data)\n",
+    "    loss = F.nll_loss(output, target)\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    if batch_idx % log_interval == 0:\n",
+    "      print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
+    "        epoch, batch_idx * len(data), len(train_loader.dataset),\n",
+    "        100. * batch_idx / len(train_loader), loss.item()))\n",
+    "      train_losses.append(loss.item())\n",
+    "      train_counter.append(\n",
+    "        (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))\n",
+    "\n",
+    "for epoch in range(1, epochs + 1):\n",
+    "  train(epoch)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We can move on by defining a TileDBPyTorch model and use model save functionality in order to save it directly to\n",
+    "our bucket on S3 (defined with AWS credentials in your TileDB-Cloud account) and register it on TileDB-Cloud."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# Define array model uri.\n",
+    "uri = \"tiledb-pytorch-model\"\n",
+    "\n",
+    "print('Defining PyTorchTileDBModel model...')\n",
+    "# In order to save our model on S3 and register it on TileDB-Cloud we have to pass our Namespace and TileDB Context.\n",
+    "tiledb_model = PyTorchTileDBModel(uri=uri, namespace=TILEDB_USER_NAME, ctx=ctx, model=model)\n",
+    "\n",
+    "# We will need the uri that was created from our model class\n",
+    "# (and follows pattern tiledb://my_username/s3://my_bucket/my_array),\n",
+    "# in order to interact with our model on TileDB-Cloud.\n",
+    "tiledb_cloud_model_uri = tiledb_model.uri\n",
+    "\n",
+    "print('Saving model on S3 and registering on TileDB-Cloud...')\n",
+    "tiledb_model.save(meta={'epochs': epochs,\n",
+    "                        'train_loss': train_losses})\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Finally, we can use TileDB-Cloud API as described in our [cloud documentation](https://docs.tiledb.com/cloud/), in order\n",
+    "to list our models, get information and deregister them."
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# List all our models. Here, we filter with file_type = 'ml_model'. All machine learning model TileDB arrays are of type\n",
+    "# 'ml_model'\n",
+    "print(\n",
+    "tiledb.cloud.client.list_arrays(\n",
+    "    file_type=['ml_model'],\n",
+    "    namespace=TILEDB_USER_NAME))\n",
+    "\n",
+    "# Get model's info\n",
+    "print(tiledb.cloud.array.info(tiledb_cloud_model_uri))\n",
+    "\n",
+    "# Load our model for inference\n",
+    "# Place holder for the loaded model\n",
+    "loaded_model = Net()\n",
+    "loaded_optimizer = optim.SGD(model.parameters(), lr=learning_rate,\n",
+    "                             momentum=momentum)\n",
+    "\n",
+    "PyTorchTileDBModel(uri=tiledb_cloud_model_uri, ctx=ctx).load(model=loaded_model, optimizer=loaded_optimizer)\n",
+    "\n",
+    "\n",
+    "# Check model parameters\n",
+    "for key_item_1, key_item_2 in zip(\n",
+    "    model.state_dict().items(), loaded_model.state_dict().items()\n",
+    "):\n",
+    "    print(torch.equal(key_item_1[1], key_item_2[1]))\n",
+    "\n",
+    "# Check optimizer parameters\n",
+    "for key_item_1, key_item_2 in zip(\n",
+    "    optimizer.state_dict().items(), loaded_optimizer.state_dict().items()\n",
+    "):\n",
+    "    print(all([a == b for a, b in zip(key_item_1[1], key_item_2[1])]))\n",
+    "\n",
+    "# Deregister model\n",
+    "tiledb.cloud.deregister_array(tiledb_cloud_model_uri)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}