Skip to content
This repository was archived by the owner on Sep 4, 2024. It is now read-only.

Commit d49e88c

Browse files
Migrate Databricks account to Azure Databricks (#82)
* update azure databricks details
1 parent 8b27e03 commit d49e88c

File tree

7 files changed

+23
-61
lines changed

7 files changed

+23
-61
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ jobs:
172172
path: ./.coverage
173173
env:
174174
DATABRICKS_CONN_TOKEN: ${{ secrets.DATABRICKS_CONN_TOKEN }}
175-
DATABRICKS_CONN_HOST: https://dbc-9c390870-65ef.cloud.databricks.com/
175+
DATABRICKS_CONN_HOST: {{ secrets.DATABRICKS_CONN_HOST }}
176+
DATABRICKS_CONN: ${{ secrets.AIRFLOW_CONN_DATABRICKS_DEFAULT }}
177+
176178

177179
Code-Coverage:
178180
if: github.event.action != 'labeled'

dev/dags/basic_notebooks.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,7 @@
1010
"new_cluster": {
1111
"cluster_name": "",
1212
"spark_version": "11.3.x-scala2.12",
13-
"aws_attributes": {
14-
"first_on_demand": 1,
15-
"availability": "SPOT_WITH_FALLBACK",
16-
"zone_id": "us-east-2b",
17-
"spot_bid_price_percent": 100,
18-
"ebs_volume_count": 0,
19-
},
20-
"node_type_id": "i3.xlarge",
13+
"node_type_id": "Standard_DS3_v2",
2114
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
2215
"enable_elastic_disk": False,
2316
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",

dev/dags/common_operator.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from datetime import datetime
22

33
from airflow.decorators import dag
4-
from astro_databricks.operators.notebook import DatabricksNotebookOperator
54
from astro_databricks.operators.common import DatabricksTaskOperator
5+
from astro_databricks.operators.notebook import DatabricksNotebookOperator
66
from astro_databricks.operators.workflow import DatabricksWorkflowTaskGroup
77

88
job_clusters = [
@@ -11,14 +11,7 @@
1111
"new_cluster": {
1212
"cluster_name": "",
1313
"spark_version": "11.3.x-scala2.12",
14-
"aws_attributes": {
15-
"first_on_demand": 1,
16-
"availability": "SPOT_WITH_FALLBACK",
17-
"zone_id": "us-east-2b",
18-
"spot_bid_price_percent": 100,
19-
"ebs_volume_count": 0,
20-
},
21-
"node_type_id": "i3.xlarge",
14+
"node_type_id": "Standard_DS3_v2",
2215
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
2316
"enable_elastic_disk": False,
2417
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",

dev/dags/task_group_example.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,7 @@
1010
"new_cluster": {
1111
"cluster_name": "",
1212
"spark_version": "11.3.x-scala2.12",
13-
"aws_attributes": {
14-
"first_on_demand": 1,
15-
"availability": "SPOT_WITH_FALLBACK",
16-
"zone_id": "us-east-2b",
17-
"spot_bid_price_percent": 100,
18-
"ebs_volume_count": 0,
19-
},
20-
"node_type_id": "i3.xlarge",
13+
"node_type_id": "Standard_DS3_v2",
2114
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
2215
"enable_elastic_disk": False,
2316
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",

example_dags/example_databricks_notebook.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,7 @@
1818
NEW_CLUSTER_SPEC = {
1919
"cluster_name": "",
2020
"spark_version": "11.3.x-scala2.12",
21-
"aws_attributes": {
22-
"first_on_demand": 1,
23-
"availability": "SPOT_WITH_FALLBACK",
24-
"zone_id": "us-east-2b",
25-
"spot_bid_price_percent": 100,
26-
"ebs_volume_count": 0,
27-
},
28-
"node_type_id": "i3.xlarge",
21+
"node_type_id": "Standard_DS3_v2",
2922
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
3023
"enable_elastic_disk": False,
3124
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",

example_dags/example_databricks_workflow.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
}
1616

1717
DATABRICKS_CONN_ID = os.getenv("ASTRO_DATABRICKS_CONN_ID", "databricks_conn")
18-
DATABRICKS_NOTIFICATION_EMAIL = os.getenv(
19-
"ASTRO_DATABRICKS_NOTIFICATION_EMAIL", "tatiana.alchueyr@astronomer.io"
20-
)
18+
19+
# DATABRICKS_NOTIFICATION_EMAIL = os.getenv(
20+
# "ASTRO_DATABRICKS_NOTIFICATION_EMAIL", "tatiana.alchueyr@astronomer.io"
21+
# )
2122
DATABRICKS_DESTINATION_ID = os.getenv(
22-
"ASTRO_DATABRICKS_DESTINATION_ID", "b0aea8ab-ea8c-4a45-a2e9-9a26753fd702"
23+
"ASTRO_DATABRICKS_DESTINATION_ID", "48c7315c-1d65-4ee3-b7d3-1692e8e8012d"
2324
)
2425

2526
USER = os.environ.get("USER")
@@ -32,14 +33,7 @@
3233
"new_cluster": {
3334
"cluster_name": "",
3435
"spark_version": "11.3.x-scala2.12",
35-
"aws_attributes": {
36-
"first_on_demand": 1,
37-
"availability": "SPOT_WITH_FALLBACK",
38-
"zone_id": "us-east-2b",
39-
"spot_bid_price_percent": 100,
40-
"ebs_volume_count": 0,
41-
},
42-
"node_type_id": "i3.xlarge",
36+
"node_type_id": "Standard_DS3_v2",
4337
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
4438
"enable_elastic_disk": False,
4539
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
@@ -72,9 +66,10 @@
7266
},
7367
],
7468
extra_job_params={
75-
"email_notifications": {
76-
"on_start": [DATABRICKS_NOTIFICATION_EMAIL],
77-
},
69+
## Commented below to avoid spam; keeping this for example purposes.
70+
# "email_notifications": {
71+
# "on_start": [DATABRICKS_NOTIFICATION_EMAIL],
72+
# },
7873
"webhook_notifications": {
7974
"on_start": [{"id": DATABRICKS_DESTINATION_ID}],
8075
},

example_dags/example_task_group.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import os
22
from datetime import datetime
33

4-
from airflow.decorators import dag, task_group
4+
from airflow.decorators import dag
55
from airflow.utils.task_group import TaskGroup
6-
76
from astro_databricks.operators.notebook import DatabricksNotebookOperator
87
from astro_databricks.operators.workflow import DatabricksWorkflowTaskGroup
98

10-
119
DATABRICKS_CONN = "databricks_conn"
1210
USER = os.environ.get("USER")
1311
GROUP_ID = os.getenv("DATABRICKS_GROUP_ID", "1234").replace(".", "_")
@@ -18,14 +16,7 @@
1816
"new_cluster": {
1917
"cluster_name": "",
2018
"spark_version": "11.3.x-scala2.12",
21-
"aws_attributes": {
22-
"first_on_demand": 1,
23-
"availability": "SPOT_WITH_FALLBACK",
24-
"zone_id": "us-east-2b",
25-
"spot_bid_price_percent": 100,
26-
"ebs_volume_count": 0,
27-
},
28-
"node_type_id": "i3.xlarge",
19+
"node_type_id": "Standard_DS3_v2",
2920
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
3021
"enable_elastic_disk": False,
3122
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
@@ -40,7 +31,9 @@
4031
schedule_interval="@daily",
4132
start_date=datetime(2021, 1, 1),
4233
catchup=False,
43-
default_args={'retries': 0}, # Users are encouraged to use the repair feature, retries may fail
34+
default_args={
35+
"retries": 0
36+
}, # Users are encouraged to use the repair feature, retries may fail
4437
tags=["astro-provider-databricks"],
4538
)
4639
def example_task_group():

0 commit comments

Comments
 (0)