From 4568eb0f8db7b1797862553bab32ed1fc1cea326 Mon Sep 17 00:00:00 2001
From: Shay Elbaz <selbaz@paypal.com>
Date: Mon, 4 Mar 2024 16:14:20 +0200
Subject: [PATCH 1/3] support cell-specific language

---
 CHANGELOG.md                                    |  2 +-
 sparkmagic/sparkmagic/kernels/kernelmagics.py   |  8 ++++++++
 sparkmagic/sparkmagic/livyclientlib/command.py  | 11 ++++++++++-
 sparkmagic/sparkmagic/magics/sparkmagicsbase.py |  4 +++-
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c0744a21..83eb590c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 ## Next Release
 
 ### Updates
-
+- Enable cell-specific language to run against the same Spark Session (for polyglot notebooks).
 - Enable automated test run as required step for release process
 - Enable basic DependaBot scanning for pip packages and GitHub actions used in CI
 - Add CI support for release proces with [release.yaml](.github/workflows/release.yml) workflow
diff --git a/sparkmagic/sparkmagic/kernels/kernelmagics.py b/sparkmagic/sparkmagic/kernels/kernelmagics.py
index 1269947a..73e4cb64 100644
--- a/sparkmagic/sparkmagic/kernels/kernelmagics.py
+++ b/sparkmagic/sparkmagic/kernels/kernelmagics.py
@@ -359,6 +359,13 @@ def configure(self, line, cell="", local_ns=None):
         help="Whether to automatically coerce the types (default, pass True if being explicit) "
         "of the dataframe or not (pass False)",
     )
+    @argument(
+        "-l",
+        "--language",
+        type=str,
+        default=None,
+        help=f"Specific language for the current cell (supported: {','.join(LANGS_SUPPORTED)})",
+    )
     @wrap_unexpected_exceptions
     @handle_expected_exceptions
     def spark(self, line, cell="", local_ns=None):
@@ -377,6 +384,7 @@ def spark(self, line, cell="", local_ns=None):
             args.samplefraction,
             None,
             coerce,
+            language=args.language,
         )
 
     @cell_magic
diff --git a/sparkmagic/sparkmagic/livyclientlib/command.py b/sparkmagic/sparkmagic/livyclientlib/command.py
index cd4e67cc..62a05764 100644
--- a/sparkmagic/sparkmagic/livyclientlib/command.py
+++ b/sparkmagic/sparkmagic/livyclientlib/command.py
@@ -36,6 +36,7 @@ def __init__(self, code, spark_events=None):
         if spark_events is None:
             spark_events = SparkEvents()
         self._spark_events = spark_events
+        self.kind = None
 
     def __repr__(self):
         return "Command({}, ...)".format(repr(self.code))
@@ -46,14 +47,22 @@ def __eq__(self, other):
     def __ne__(self, other):
         return not self == other
 
+    def set_language(self, lang):
+        if lang is not None:
+            self.kind = conf.get_livy_kind(lang)
+        return self
+
     def execute(self, session):
+        kind = self.kind or session.kind
         self._spark_events.emit_statement_execution_start_event(
-            session.guid, session.kind, session.id, self.guid
+            session.guid, kind, session.id, self.guid
         )
         statement_id = -1
         try:
             session.wait_for_idle()
             data = {"code": self.code}
+            if self.kind:
+                data["kind"] = kind
             response = session.http_client.post_statement(session.id, data)
             statement_id = response["id"]
             output = self._get_statement_output(session, statement_id)
diff --git a/sparkmagic/sparkmagic/magics/sparkmagicsbase.py b/sparkmagic/sparkmagic/magics/sparkmagicsbase.py
index 93fcc6f6..5a298d5d 100644
--- a/sparkmagic/sparkmagic/magics/sparkmagicsbase.py
+++ b/sparkmagic/sparkmagic/magics/sparkmagicsbase.py
@@ -124,6 +124,7 @@ def execute_spark(
         session_name,
         coerce,
         output_handler=None,
+        language=None,
     ):
         output_handler = output_handler or SparkOutputHandler(
             html=self.ipython_display.html,
@@ -131,8 +132,9 @@ def execute_spark(
             default=self.ipython_display.display,
         )
 
+        command = Command(cell).set_language(language)
         (success, out, mimetype) = self.spark_controller.run_command(
-            Command(cell), session_name
+            command, session_name
         )
         if not success:
             if conf.shutdown_session_on_spark_statement_errors():

From 839e6749f161a1116f445d9a224ecc595a6e1db4 Mon Sep 17 00:00:00 2001
From: Shay Elbaz <selbaz@paypal.com>
Date: Wed, 8 Jan 2025 13:41:59 +0200
Subject: [PATCH 2/3] example notebook

---
 examples/Multilang Notebook.ipynb | 312 ++++++++++++++++++++++++++++++
 1 file changed, 312 insertions(+)
 create mode 100644 examples/Multilang Notebook.ipynb

diff --git a/examples/Multilang Notebook.ipynb b/examples/Multilang Notebook.ipynb
new file mode 100644
index 00000000..b2b4139d
--- /dev/null
+++ b/examples/Multilang Notebook.ipynb	
@@ -0,0 +1,312 @@
+{
+    "cells": [
+     {
+      "cell_type": "markdown",
+      "id": "eb22e89b-bc39-424a-a205-68479ace406c",
+      "metadata": {},
+      "source": [
+       "# Mixing Spark and PySpark cells in the same notebooke\n",
+       "\n",
+       "Sparkmagic enables the use of Python, Scala, and R code cells within the same Notebook and SparkSession, allowing you to mix UDFs from different languages in a single DataFrame and leverage any Spark library—whether in Python, Scala, or R—in the language of your choice.\n",
+       "\n",
+       "**Note:** This notebook illustrates the use of Python and Scala, but the process for R is the same.\n",
+       "\n",
+       "**Note:** Remember to specify spark.jars or spark.PyFiles (as needed) when you want to import external packages into Spark."
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "id": "964d3636-112c-450b-a5e2-b5f95902fe9f",
+      "metadata": {},
+      "source": [
+       "### Sharing UDFs\n",
+       "\n",
+       "Custom logic, such as UDFs, can be used in any language. This example shows how you can use Python functions in Scala"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 13,
+      "id": "afffbedb-1e2b-4864-95ef-9624ca32ea10",
+      "metadata": {},
+      "outputs": [
+       {
+        "data": {
+         "application/vnd.jupyter.widget-view+json": {
+          "model_id": "",
+          "version_major": 2,
+          "version_minor": 0
+         },
+         "text/plain": [
+          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+         ]
+        },
+        "metadata": {},
+        "output_type": "display_data"
+       },
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "df: org.apache.spark.sql.Dataset[Long] = [id: bigint]\n"
+        ]
+       }
+      ],
+      "source": [
+       "%%spark -l scala\n",
+       "\n",
+       "val df = spark.range(10)"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 14,
+      "id": "49f22be2-3fb0-4328-b4dd-5bb0b962e203",
+      "metadata": {},
+      "outputs": [
+       {
+        "data": {
+         "application/vnd.jupyter.widget-view+json": {
+          "model_id": "",
+          "version_major": 2,
+          "version_minor": 0
+         },
+         "text/plain": [
+          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+         ]
+        },
+        "metadata": {},
+        "output_type": "display_data"
+       },
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "<function plus_one at 0x7feb35e11950>"
+        ]
+       }
+      ],
+      "source": [
+       "%%spark -l python\n",
+       "\n",
+       "\n",
+       "def plus_one(x):\n",
+       "    return x + 1\n",
+       "\n",
+       "\n",
+       "def real_world_function(x, y, z):\n",
+       "    # import pandas, networkx, scikit ...\n",
+       "    pass\n",
+       "\n",
+       "spark.udf.register(\"plus_one\", plus_one, returnType=\"int\")"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 15,
+      "id": "59793c3c-14b4-4d55-9b6c-b12a7284ca1b",
+      "metadata": {},
+      "outputs": [
+       {
+        "data": {
+         "application/vnd.jupyter.widget-view+json": {
+          "model_id": "",
+          "version_major": 2,
+          "version_minor": 0
+         },
+         "text/plain": [
+          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+         ]
+        },
+        "metadata": {},
+        "output_type": "display_data"
+       },
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "df2: org.apache.spark.sql.DataFrame = [id: bigint, col2: int]\n",
+         "== Physical Plan ==\n",
+         "*(2) Project [id#21L, pythonUDF0#29 AS col2#23]\n",
+         "+- BatchEvalPython [plus_one(id#21L)], [id#21L, pythonUDF0#29]\n",
+         "   +- *(1) Range (0, 10, step=1, splits=4)\n",
+         "+---+----+\n",
+         "| id|col2|\n",
+         "+---+----+\n",
+         "|  0|   1|\n",
+         "|  1|   2|\n",
+         "|  2|   3|\n",
+         "|  3|   4|\n",
+         "|  4|   5|\n",
+         "|  5|   6|\n",
+         "|  6|   7|\n",
+         "|  7|   8|\n",
+         "|  8|   9|\n",
+         "|  9|  10|\n",
+         "+---+----+\n",
+         "\n"
+        ]
+       }
+      ],
+      "source": [
+       "%%spark -l scala\n",
+       "\n",
+       "val df2 = df.withColumn(\"col2\", callUDF(\"plus_one\", $\"id\"))\n",
+       "df2.explain()\n",
+       "df2.show()"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "id": "8f8dd5e9-4d29-437d-90fe-75445cb9072f",
+      "metadata": {},
+      "source": [
+       "Logic can also be shared via Views:"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 16,
+      "id": "12e93be3-bacb-4b1a-9216-5710ff24c5ce",
+      "metadata": {},
+      "outputs": [
+       {
+        "data": {
+         "application/vnd.jupyter.widget-view+json": {
+          "model_id": "",
+          "version_major": 2,
+          "version_minor": 0
+         },
+         "text/plain": [
+          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+         ]
+        },
+        "metadata": {},
+        "output_type": "display_data"
+       }
+      ],
+      "source": [
+       "%%spark -l python\n",
+       "\n",
+       "df = spark.range(10)\n",
+       "df.createTempView(\"for_scala\")"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 22,
+      "id": "bc06e2e9-315c-4495-908a-9e0c623e17dc",
+      "metadata": {},
+      "outputs": [
+       {
+        "data": {
+         "application/vnd.jupyter.widget-view+json": {
+          "model_id": "",
+          "version_major": 2,
+          "version_minor": 0
+         },
+         "text/plain": [
+          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+         ]
+        },
+        "metadata": {},
+        "output_type": "display_data"
+       },
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "otherDF: org.apache.spark.sql.DataFrame = [id: bigint, scala_col: bigint]\n"
+        ]
+       }
+      ],
+      "source": [
+       "%%spark -l scala\n",
+       "\n",
+       "val otherDF = spark.range(10).withColumn(\"scala_col\", $\"id\" * 100)\n",
+       "\n",
+       "spark.table(\"for_scala\").join(otherDF, Seq(\"id\")).createOrReplaceTempView(\"for_python\")"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 23,
+      "id": "2ee0abb1-c019-4eb0-af6e-9f9653ec3cf3",
+      "metadata": {},
+      "outputs": [
+       {
+        "data": {
+         "application/vnd.jupyter.widget-view+json": {
+          "model_id": "",
+          "version_major": 2,
+          "version_minor": 0
+         },
+         "text/plain": [
+          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
+         ]
+        },
+        "metadata": {},
+        "output_type": "display_data"
+       },
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "== Physical Plan ==\n",
+         "*(2) Project [id#38L, scala_col#75L]\n",
+         "+- *(2) BroadcastHashJoin [id#38L], [id#73L], Inner, BuildLeft\n",
+         "   :- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, false]))\n",
+         "   :  +- *(1) Range (0, 10, step=1, splits=4)\n",
+         "   +- *(2) Project [id#73L, (id#73L * 100) AS scala_col#75L]\n",
+         "      +- *(2) Range (0, 10, step=1, splits=4)\n",
+         "+---+---------+\n",
+         "| id|scala_col|\n",
+         "+---+---------+\n",
+         "|  0|        0|\n",
+         "|  1|      100|\n",
+         "|  2|      200|\n",
+         "|  3|      300|\n",
+         "|  4|      400|\n",
+         "|  5|      500|\n",
+         "|  6|      600|\n",
+         "|  7|      700|\n",
+         "|  8|      800|\n",
+         "|  9|      900|\n",
+         "+---+---------+"
+        ]
+       }
+      ],
+      "source": [
+       "%%spark -l python\n",
+       "\n",
+       "spark.table(\"for_python\").explain()\n",
+       "spark.table(\"for_python\").show()"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "21afe6c9-681c-41c3-801c-0934bf43acac",
+      "metadata": {},
+      "outputs": [],
+      "source": []
+     }
+    ],
+    "metadata": {
+     "kernelspec": {
+      "display_name": "Spark",
+      "language": "scala",
+      "name": "sparkkernel"
+     },
+     "language_info": {
+      "codemirror_mode": "text/x-scala",
+      "file_extension": ".sc",
+      "mimetype": "text/x-scala",
+      "name": "scala",
+      "pygments_lexer": "scala"
+     }
+    },
+    "nbformat": 4,
+    "nbformat_minor": 5
+   }
+   
\ No newline at end of file

From c981174b7670aafc2b587adc2070e992d56cf16a Mon Sep 17 00:00:00 2001
From: Shay Elbaz <selbaz@paypal.com>
Date: Wed, 8 Jan 2025 21:02:44 +0200
Subject: [PATCH 3/3] typo

---
 examples/Multilang Notebook.ipynb | 595 +++++++++++++++---------------
 1 file changed, 297 insertions(+), 298 deletions(-)

diff --git a/examples/Multilang Notebook.ipynb b/examples/Multilang Notebook.ipynb
index b2b4139d..9dd08445 100644
--- a/examples/Multilang Notebook.ipynb	
+++ b/examples/Multilang Notebook.ipynb	
@@ -1,312 +1,311 @@
 {
-    "cells": [
-     {
-      "cell_type": "markdown",
-      "id": "eb22e89b-bc39-424a-a205-68479ace406c",
-      "metadata": {},
-      "source": [
-       "# Mixing Spark and PySpark cells in the same notebooke\n",
-       "\n",
-       "Sparkmagic enables the use of Python, Scala, and R code cells within the same Notebook and SparkSession, allowing you to mix UDFs from different languages in a single DataFrame and leverage any Spark library—whether in Python, Scala, or R—in the language of your choice.\n",
-       "\n",
-       "**Note:** This notebook illustrates the use of Python and Scala, but the process for R is the same.\n",
-       "\n",
-       "**Note:** Remember to specify spark.jars or spark.PyFiles (as needed) when you want to import external packages into Spark."
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "eb22e89b-bc39-424a-a205-68479ace406c",
+   "metadata": {},
+   "source": [
+    "# Mixing Spark and PySpark cells in the same Notebook\n",
+    "\n",
+    "Sparkmagic enables the use of Python, Scala, and R code cells within the same Notebook and SparkSession, allowing you to mix UDFs from different languages in a single DataFrame and leverage any Spark library—whether in Python, Scala, or R—in the language of your choice.\n",
+    "\n",
+    "**Note:** This notebook illustrates the use of Python and Scala, but the process for R is the same.\n",
+    "\n",
+    "**Note:** Remember to specify spark.jars or spark.PyFiles (as needed) when you want to import external packages into Spark."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "964d3636-112c-450b-a5e2-b5f95902fe9f",
+   "metadata": {},
+   "source": [
+    "### Sharing UDFs\n",
+    "\n",
+    "Custom logic, such as UDFs, can be used in any language. This example shows how you can use Python functions in Scala"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "afffbedb-1e2b-4864-95ef-9624ca32ea10",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
-     {
-      "cell_type": "markdown",
-      "id": "964d3636-112c-450b-a5e2-b5f95902fe9f",
-      "metadata": {},
-      "source": [
-       "### Sharing UDFs\n",
-       "\n",
-       "Custom logic, such as UDFs, can be used in any language. This example shows how you can use Python functions in Scala"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": 13,
-      "id": "afffbedb-1e2b-4864-95ef-9624ca32ea10",
-      "metadata": {},
-      "outputs": [
-       {
-        "data": {
-         "application/vnd.jupyter.widget-view+json": {
-          "model_id": "",
-          "version_major": 2,
-          "version_minor": 0
-         },
-         "text/plain": [
-          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
-         ]
-        },
-        "metadata": {},
-        "output_type": "display_data"
-       },
-       {
-        "name": "stdout",
-        "output_type": "stream",
-        "text": [
-         "df: org.apache.spark.sql.Dataset[Long] = [id: bigint]\n"
-        ]
-       }
-      ],
-      "source": [
-       "%%spark -l scala\n",
-       "\n",
-       "val df = spark.range(10)"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": 14,
-      "id": "49f22be2-3fb0-4328-b4dd-5bb0b962e203",
-      "metadata": {},
-      "outputs": [
-       {
-        "data": {
-         "application/vnd.jupyter.widget-view+json": {
-          "model_id": "",
-          "version_major": 2,
-          "version_minor": 0
-         },
-         "text/plain": [
-          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
-         ]
-        },
-        "metadata": {},
-        "output_type": "display_data"
-       },
-       {
-        "name": "stdout",
-        "output_type": "stream",
-        "text": [
-         "<function plus_one at 0x7feb35e11950>"
-        ]
-       }
-      ],
-      "source": [
-       "%%spark -l python\n",
-       "\n",
-       "\n",
-       "def plus_one(x):\n",
-       "    return x + 1\n",
-       "\n",
-       "\n",
-       "def real_world_function(x, y, z):\n",
-       "    # import pandas, networkx, scikit ...\n",
-       "    pass\n",
-       "\n",
-       "spark.udf.register(\"plus_one\", plus_one, returnType=\"int\")"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": 15,
-      "id": "59793c3c-14b4-4d55-9b6c-b12a7284ca1b",
-      "metadata": {},
-      "outputs": [
-       {
-        "data": {
-         "application/vnd.jupyter.widget-view+json": {
-          "model_id": "",
-          "version_major": 2,
-          "version_minor": 0
-         },
-         "text/plain": [
-          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
-         ]
-        },
-        "metadata": {},
-        "output_type": "display_data"
-       },
-       {
-        "name": "stdout",
-        "output_type": "stream",
-        "text": [
-         "df2: org.apache.spark.sql.DataFrame = [id: bigint, col2: int]\n",
-         "== Physical Plan ==\n",
-         "*(2) Project [id#21L, pythonUDF0#29 AS col2#23]\n",
-         "+- BatchEvalPython [plus_one(id#21L)], [id#21L, pythonUDF0#29]\n",
-         "   +- *(1) Range (0, 10, step=1, splits=4)\n",
-         "+---+----+\n",
-         "| id|col2|\n",
-         "+---+----+\n",
-         "|  0|   1|\n",
-         "|  1|   2|\n",
-         "|  2|   3|\n",
-         "|  3|   4|\n",
-         "|  4|   5|\n",
-         "|  5|   6|\n",
-         "|  6|   7|\n",
-         "|  7|   8|\n",
-         "|  8|   9|\n",
-         "|  9|  10|\n",
-         "+---+----+\n",
-         "\n"
-        ]
-       }
-      ],
-      "source": [
-       "%%spark -l scala\n",
-       "\n",
-       "val df2 = df.withColumn(\"col2\", callUDF(\"plus_one\", $\"id\"))\n",
-       "df2.explain()\n",
-       "df2.show()"
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "df: org.apache.spark.sql.Dataset[Long] = [id: bigint]\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%spark -l scala\n",
+    "\n",
+    "val df = spark.range(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "49f22be2-3fb0-4328-b4dd-5bb0b962e203",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
-     {
-      "cell_type": "markdown",
-      "id": "8f8dd5e9-4d29-437d-90fe-75445cb9072f",
-      "metadata": {},
-      "source": [
-       "Logic can also be shared via Views:"
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<function plus_one at 0x7feb35e11950>"
+     ]
+    }
+   ],
+   "source": [
+    "%%spark -l python\n",
+    "\n",
+    "\n",
+    "def plus_one(x):\n",
+    "    return x + 1\n",
+    "\n",
+    "\n",
+    "def real_world_function(x, y, z):\n",
+    "    # import pandas, networkx, scikit ...\n",
+    "    pass\n",
+    "\n",
+    "spark.udf.register(\"plus_one\", plus_one, returnType=\"int\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "59793c3c-14b4-4d55-9b6c-b12a7284ca1b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
-     {
-      "cell_type": "code",
-      "execution_count": 16,
-      "id": "12e93be3-bacb-4b1a-9216-5710ff24c5ce",
-      "metadata": {},
-      "outputs": [
-       {
-        "data": {
-         "application/vnd.jupyter.widget-view+json": {
-          "model_id": "",
-          "version_major": 2,
-          "version_minor": 0
-         },
-         "text/plain": [
-          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
-         ]
-        },
-        "metadata": {},
-        "output_type": "display_data"
-       }
-      ],
-      "source": [
-       "%%spark -l python\n",
-       "\n",
-       "df = spark.range(10)\n",
-       "df.createTempView(\"for_scala\")"
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "df2: org.apache.spark.sql.DataFrame = [id: bigint, col2: int]\n",
+      "== Physical Plan ==\n",
+      "*(2) Project [id#21L, pythonUDF0#29 AS col2#23]\n",
+      "+- BatchEvalPython [plus_one(id#21L)], [id#21L, pythonUDF0#29]\n",
+      "   +- *(1) Range (0, 10, step=1, splits=4)\n",
+      "+---+----+\n",
+      "| id|col2|\n",
+      "+---+----+\n",
+      "|  0|   1|\n",
+      "|  1|   2|\n",
+      "|  2|   3|\n",
+      "|  3|   4|\n",
+      "|  4|   5|\n",
+      "|  5|   6|\n",
+      "|  6|   7|\n",
+      "|  7|   8|\n",
+      "|  8|   9|\n",
+      "|  9|  10|\n",
+      "+---+----+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%spark -l scala\n",
+    "\n",
+    "val df2 = df.withColumn(\"col2\", callUDF(\"plus_one\", $\"id\"))\n",
+    "df2.explain()\n",
+    "df2.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8f8dd5e9-4d29-437d-90fe-75445cb9072f",
+   "metadata": {},
+   "source": [
+    "Logic can also be shared via Views:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "12e93be3-bacb-4b1a-9216-5710ff24c5ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
-     {
-      "cell_type": "code",
-      "execution_count": 22,
-      "id": "bc06e2e9-315c-4495-908a-9e0c623e17dc",
-      "metadata": {},
-      "outputs": [
-       {
-        "data": {
-         "application/vnd.jupyter.widget-view+json": {
-          "model_id": "",
-          "version_major": 2,
-          "version_minor": 0
-         },
-         "text/plain": [
-          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
-         ]
-        },
-        "metadata": {},
-        "output_type": "display_data"
-       },
-       {
-        "name": "stdout",
-        "output_type": "stream",
-        "text": [
-         "otherDF: org.apache.spark.sql.DataFrame = [id: bigint, scala_col: bigint]\n"
-        ]
-       }
-      ],
-      "source": [
-       "%%spark -l scala\n",
-       "\n",
-       "val otherDF = spark.range(10).withColumn(\"scala_col\", $\"id\" * 100)\n",
-       "\n",
-       "spark.table(\"for_scala\").join(otherDF, Seq(\"id\")).createOrReplaceTempView(\"for_python\")"
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%%spark -l python\n",
+    "\n",
+    "df = spark.range(10)\n",
+    "df.createTempView(\"for_scala\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "bc06e2e9-315c-4495-908a-9e0c623e17dc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
-     {
-      "cell_type": "code",
-      "execution_count": 23,
-      "id": "2ee0abb1-c019-4eb0-af6e-9f9653ec3cf3",
-      "metadata": {},
-      "outputs": [
-       {
-        "data": {
-         "application/vnd.jupyter.widget-view+json": {
-          "model_id": "",
-          "version_major": 2,
-          "version_minor": 0
-         },
-         "text/plain": [
-          "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
-         ]
-        },
-        "metadata": {},
-        "output_type": "display_data"
-       },
-       {
-        "name": "stdout",
-        "output_type": "stream",
-        "text": [
-         "== Physical Plan ==\n",
-         "*(2) Project [id#38L, scala_col#75L]\n",
-         "+- *(2) BroadcastHashJoin [id#38L], [id#73L], Inner, BuildLeft\n",
-         "   :- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, false]))\n",
-         "   :  +- *(1) Range (0, 10, step=1, splits=4)\n",
-         "   +- *(2) Project [id#73L, (id#73L * 100) AS scala_col#75L]\n",
-         "      +- *(2) Range (0, 10, step=1, splits=4)\n",
-         "+---+---------+\n",
-         "| id|scala_col|\n",
-         "+---+---------+\n",
-         "|  0|        0|\n",
-         "|  1|      100|\n",
-         "|  2|      200|\n",
-         "|  3|      300|\n",
-         "|  4|      400|\n",
-         "|  5|      500|\n",
-         "|  6|      600|\n",
-         "|  7|      700|\n",
-         "|  8|      800|\n",
-         "|  9|      900|\n",
-         "+---+---------+"
-        ]
-       }
-      ],
-      "source": [
-       "%%spark -l python\n",
-       "\n",
-       "spark.table(\"for_python\").explain()\n",
-       "spark.table(\"for_python\").show()"
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "otherDF: org.apache.spark.sql.DataFrame = [id: bigint, scala_col: bigint]\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%spark -l scala\n",
+    "\n",
+    "val otherDF = spark.range(10).withColumn(\"scala_col\", $\"id\" * 100)\n",
+    "\n",
+    "spark.table(\"for_scala\").join(otherDF, Seq(\"id\")).createOrReplaceTempView(\"for_python\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "2ee0abb1-c019-4eb0-af6e-9f9653ec3cf3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…"
       ]
      },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "21afe6c9-681c-41c3-801c-0934bf43acac",
-      "metadata": {},
-      "outputs": [],
-      "source": []
-     }
-    ],
-    "metadata": {
-     "kernelspec": {
-      "display_name": "Spark",
-      "language": "scala",
-      "name": "sparkkernel"
-     },
-     "language_info": {
-      "codemirror_mode": "text/x-scala",
-      "file_extension": ".sc",
-      "mimetype": "text/x-scala",
-      "name": "scala",
-      "pygments_lexer": "scala"
-     }
+     "metadata": {},
+     "output_type": "display_data"
     },
-    "nbformat": 4,
-    "nbformat_minor": 5
-   }
-   
\ No newline at end of file
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "== Physical Plan ==\n",
+      "*(2) Project [id#38L, scala_col#75L]\n",
+      "+- *(2) BroadcastHashJoin [id#38L], [id#73L], Inner, BuildLeft\n",
+      "   :- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, false]))\n",
+      "   :  +- *(1) Range (0, 10, step=1, splits=4)\n",
+      "   +- *(2) Project [id#73L, (id#73L * 100) AS scala_col#75L]\n",
+      "      +- *(2) Range (0, 10, step=1, splits=4)\n",
+      "+---+---------+\n",
+      "| id|scala_col|\n",
+      "+---+---------+\n",
+      "|  0|        0|\n",
+      "|  1|      100|\n",
+      "|  2|      200|\n",
+      "|  3|      300|\n",
+      "|  4|      400|\n",
+      "|  5|      500|\n",
+      "|  6|      600|\n",
+      "|  7|      700|\n",
+      "|  8|      800|\n",
+      "|  9|      900|\n",
+      "+---+---------+"
+     ]
+    }
+   ],
+   "source": [
+    "%%spark -l python\n",
+    "\n",
+    "spark.table(\"for_python\").explain()\n",
+    "spark.table(\"for_python\").show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21afe6c9-681c-41c3-801c-0934bf43acac",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Spark",
+   "language": "scala",
+   "name": "sparkkernel"
+  },
+  "language_info": {
+   "codemirror_mode": "text/x-scala",
+   "file_extension": ".sc",
+   "mimetype": "text/x-scala",
+   "name": "scala",
+   "pygments_lexer": "scala"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}