databricks
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi
Lines changed: 3 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi
Lines changed: 3 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/extensions.json
Lines changed: 7 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/extensions.json
Lines changed: 7 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json
Lines changed: 21 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/.vscode/settings.json
Lines changed: 21 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md
Lines changed: 41 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/README.md
Lines changed: 41 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml
Lines changed: 49 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/databricks.yml
Lines changed: 49 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/out.gitignore
Lines changed: 8 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/out.gitignore
Lines changed: 8 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md
Lines changed: 22 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/README.md
Lines changed: 22 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb
Lines changed: 63 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/explorations/sample_exploration.ipynb
Lines changed: 63 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml
Lines changed: 19 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.job.yml
Lines changed: 19 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml
Lines changed: 14 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/my_lakeflow_pipelines.pipeline.yml
Lines changed: 14 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py
Lines changed: 16 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_trips_my_lakeflow_pipelines.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py
Lines changed: 19 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/transformations/sample_zones_my_lakeflow_pipelines.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/utilities/utils.py
Lines changed: 8 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/python/output/my_lakeflow_pipelines/resources/my_lakeflow_pipelines_pipeline/utilities/utils.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/sql/input.json
Lines changed: 6 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/sql/input.json
Lines changed: 6 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output.txt
Lines changed: 29 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output.txt
Lines changed: 29 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi
Lines changed: 3 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/__builtins__.pyi
Lines changed: 3 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/extensions.json
Lines changed: 7 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/extensions.json
Lines changed: 7 additions & 0 deletions
diff --git a/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json
Lines changed: 21 additions & 0 deletions b/‎acceptance/bundle/templates/lakeflow-pipelines/sql/output/my_lakeflow_pipelines/.vscode/settings.json
Lines changed: 21 additions & 0 deletions
@@ -0,0 +1,3 @@
+# Typings for Pylance in Visual Studio Code
+# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
+from databricks.sdk.runtime import *
@@ -0,0 +1,7 @@
+{
+    "recommendations": [
+        "databricks.databricks",
+        "ms-python.vscode-pylance",
+        "redhat.vscode-yaml"
+    ]
+}
@@ -0,0 +1,21 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "databricks.python.envFile": "${workspaceFolder}/.env",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.analysis.extraPaths": ["assets/etl_pipeline"],
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true,
+    },
+}
@@ -0,0 +1,41 @@
+# my_lakeflow_pipelines
+
+The 'my_lakeflow_pipelines' project was generated by using the Lakeflow template.
+
+## Setup
+
+1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+2. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks auth login
+    ```
+
+3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
+   https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from
+   https://www.databricks.com/blog/announcing-pycharm-integration-databricks.
+
+
+## Deploying resources
+
+1. To deploy a development copy of this project, type:
+    ```
+    $ databricks bundle deploy --target dev
+    ```
+    (Note that "dev" is the default target, so the `--target` parameter
+    is optional here.)
+
+2. Similarly, to deploy a production copy, type:
+   ```
+   $ databricks bundle deploy --target prod
+   ```
+
+3. Use the "summary" comand to review everything that was deployed:
+   ```
+   $ databricks bundle summary
+   ```
+
+4. To run a job or pipeline, use the "run" command:
+   ```
+   $ databricks bundle run
+   ```
@@ -0,0 +1,49 @@
+# This is a Databricks asset bundle definition for my_lakeflow_pipelines.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: my_lakeflow_pipelines
+  uuid: [UUID]
+
+include:
+  - resources/*.yml
+  - resources/*/*.yml
+
+# Variable declarations. These variables are assigned in the dev/prod targets below.
+variables:
+  catalog:
+    description: The catalog to use
+  schema:
+    description: The schema to use
+  notifications:
+    description: The email addresses to use for failure notifications
+
+targets:
+  dev:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
+    mode: development
+    default: true
+    workspace:
+      host: [DATABRICKS_URL]
+    variables:
+      catalog: main
+      schema: ${workspace.current_user.short_name}
+      notifications: []
+
+  prod:
+    mode: production
+    workspace:
+      host: [DATABRICKS_URL]
+      # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy.
+      root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - user_name: [USERNAME]
+        level: CAN_MANAGE
+    run_as:
+      user_name: [USERNAME]
+    variables:
+      catalog: main
+      schema: default
+      notifications: [[USERNAME]]
@@ -0,0 +1,8 @@
+.databricks/
+build/
+dist/
+__pycache__/
+*.egg-info
+.venv/
+**/explorations/**
+**/!explorations/README.md
@@ -0,0 +1,22 @@
+# my_lakeflow_pipelines_pipeline
+
+This folder defines all source code for the my_lakeflow_pipelines_pipeline pipeline:
+
+- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline.
+- `transformations`: All dataset definitions and transformations.
+- `utilities`: Utility functions and Python modules used in this pipeline.
+- `data_sources` (optional): View definitions describing the source data for this pipeline.
+
+## Getting Started
+
+To get started, go to the `transformations` folder -- most of the relevant source code lives there:
+
+* By convention, every dataset under `transformations` is in a separate file.
+* Take a look at the sample under "sample_trips_my_lakeflow_pipelines.py" to get familiar with the syntax.
+  Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html.
+* Use `Run file` to run and preview a single transformation.
+* Use `Run pipeline` to run _all_ transformations in the entire pipeline.
+* Use `+ Add` in the file browser to add a new data set definition.
+* Use `Schedule` to run the pipeline on a schedule!
+
+For more tutorials and reference material, see https://docs.databricks.com/dlt.
@@ -0,0 +1,63 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "tableResultSettingsMap": {},
+     "title": ""
+    }
+   },
+   "source": [
+    "### Example Exploratory Notebook\n",
+    "\n",
+    "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n",
+    "\n",
+    "**Note**: This notebook is not executed as part of the pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "tableResultSettingsMap": {},
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n",
+    "\n",
+    "display(spark.sql(\"SELECT * FROM main.[USERNAME].my_lakeflow_pipelines\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "computePreferences": null,
+   "dashboards": [],
+   "environmentMetadata": null,
+   "inputWidgetPreferences": null,
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "sample_exploration",
+   "widgets": {}
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
@@ -0,0 +1,19 @@
+# The job that triggers my_lakeflow_pipelines_pipeline.
+resources:
+  jobs:
+    my_lakeflow_pipelines_job:
+      name: my_lakeflow_pipelines_job
+
+      trigger:
+        # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
+        periodic:
+          interval: 1
+          unit: DAYS
+
+      email_notifications:
+        on_failure: ${var.notifications}
+
+      tasks:
+        - task_key: refresh_pipeline
+          pipeline_task:
+            pipeline_id: ${resources.pipelines.my_lakeflow_pipelines_pipeline.id}
@@ -0,0 +1,14 @@
+resources:
+  pipelines:
+    my_lakeflow_pipelines_pipeline:
+      name: my_lakeflow_pipelines_pipeline
+      serverless: true
+      continuous: false
+      channel: "PREVIEW"
+      photon: true
+      catalog: ${var.catalog}
+      schema: ${var.schema}
+      root_path: "."
+      libraries:
+        - glob:
+            include: transformations/**
@@ -0,0 +1,16 @@
+import dlt
+from pyspark.sql.functions import col
+from utilities import utils
+
+
+# This file defines a sample transformation.
+# Edit the sample below or add new transformations
+# using "+ Add" in the file browser.
+
+
+@dlt.table
+def sample_trips_my_lakeflow_pipelines():
+    return (
+        spark.read.table("samples.nyctaxi.trips")
+        .withColumn("trip_distance_km", utils.distance_km(col("trip_distance")))
+    )
@@ -0,0 +1,19 @@
+import dlt
+from pyspark.sql.functions import col, sum
+
+
+# This file defines a sample transformation.
+# Edit the sample below or add new transformations
+# using "+ Add" in the file browser.
+
+
+@dlt.table
+def sample_zones_my_lakeflow_pipelines():
+    # Read from the "sample_trips" table, then sum all the fares
+    return (
+        spark.read.table("sample_trips_my_lakeflow_pipelines")
+        .groupBy(col("pickup_zip"))
+        .agg(
+            sum("fare_amount").alias("total_fare")
+        )
+    )
@@ -0,0 +1,8 @@
+from pyspark.sql.functions import udf
+from pyspark.sql.types import FloatType
+
+
+@udf(returnType=FloatType())
+def distance_km(distance_miles):
+    """Convert distance from miles to kilometers (1 mile = 1.60934 km)."""
+    return distance_miles * 1.60934
@@ -0,0 +1,6 @@
+{
+    "project_name": "my_lakeflow_pipelines",
+    "default_catalog": "main",
+    "personal_schemas": "yes",
+    "language": "sql"
+}
@@ -0,0 +1,29 @@
+
+>>> [CLI] bundle init lakeflow-pipelines --config-file ./input.json --output-dir output
+
+Welcome to the template for Lakeflow Declarative Pipelines!
+
+
+Your new project has been created in the 'my_lakeflow_pipelines' directory!
+
+Refer to the README.md file for "getting started" instructions!
+
+>>> [CLI] bundle validate -t dev
+Name: my_lakeflow_pipelines
+Target: dev
+Workspace:
+  Host: [DATABRICKS_URL]
+  User: [USERNAME]
+  Path: /Workspace/Users/[USERNAME]/.bundle/my_lakeflow_pipelines/dev
+
+Validation OK!
+
+>>> [CLI] bundle validate -t prod
+Name: my_lakeflow_pipelines
+Target: prod
+Workspace:
+  Host: [DATABRICKS_URL]
+  User: [USERNAME]
+  Path: /Workspace/Users/[USERNAME]/.bundle/my_lakeflow_pipelines/prod
+
+Validation OK!
@@ -0,0 +1,3 @@
+# Typings for Pylance in Visual Studio Code
+# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
+from databricks.sdk.runtime import *
@@ -0,0 +1,7 @@
+{
+    "recommendations": [
+        "databricks.databricks",
+        "ms-python.vscode-pylance",
+        "redhat.vscode-yaml"
+    ]
+}
@@ -0,0 +1,21 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "databricks.python.envFile": "${workspaceFolder}/.env",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.analysis.extraPaths": ["assets/etl_pipeline"],
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true,
+    },
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Typings for Pylance in Visual Studio Code`
	`2`	`+# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md`
	`3`	`+from databricks.sdk.runtime import *`