diff --git a/airflow3_bdit_dag_utils b/airflow3_bdit_dag_utils index 50e8be577..e314e55ce 160000 --- a/airflow3_bdit_dag_utils +++ b/airflow3_bdit_dag_utils @@ -1 +1 @@ -Subproject commit 50e8be5770d39ee278caea772c8eee5a594b79a0 +Subproject commit e314e55ce868c2bb5fae6f65788ce649daddad71 diff --git a/bdit_dag_utils b/bdit_dag_utils index 334cba40f..4318b7574 160000 --- a/bdit_dag_utils +++ b/bdit_dag_utils @@ -1 +1 @@ -Subproject commit 334cba40f9a789a50cc910197a44c048193c7f37 +Subproject commit 4318b75747e9a196a284b2888715ada5edbc184f diff --git a/constraints.txt b/constraints.txt index ab770e895..1d8e8459b 100644 --- a/constraints.txt +++ b/constraints.txt @@ -1,193 +1,130 @@ # -# This constraints file was automatically generated on 2024-09-16T16:21:46.279414 -# via "eager-upgrade" mechanism of PIP. For the "v2-10-test" branch of Airflow. +# This constraints file was automatically generated on 2025-06-03T13:56:17.557640 +# via `uv sync --resolution highest` for the "v3-0-test" branch of Airflow. # This variant of constraints install just the 'bare' 'apache-airflow' package build from the HEAD of # the branch, without installing any of the providers. # # Those constraints represent the "newest" dependencies airflow could use, if providers did not limit # Airflow in any way. # -Authlib==1.3.2 -ConfigUpdater==3.2 -Deprecated==1.2.14 -Flask-AppBuilder==4.5.0 -Flask-Babel==2.0.0 -Flask-Bcrypt==1.0.1 -Flask-Caching==2.3.0 -Flask-JWT-Extended==4.6.0 -Flask-Limiter==3.8.0 -Flask-Login==0.6.3 -Flask-SQLAlchemy==2.5.1 -Flask-Session==0.5.0 -Flask-WTF==1.2.1 +Deprecated==1.2.18 Flask==2.2.5 -Jinja2==3.1.4 -Mako==1.3.5 -MarkupSafe==2.1.5 -PyJWT==2.9.0 +Jinja2==3.1.6 +Mako==1.3.10 +MarkupSafe==3.0.2 +PyJWT==2.10.1 PyYAML==6.0.2 -Pygments==2.18.0 +Pygments==2.19.1 SQLAlchemy-JSONField==1.0.2 SQLAlchemy-Utils==0.41.2 SQLAlchemy==1.4.54 -WTForms==3.1.2 Werkzeug==2.2.3 -aiobotocore==2.15.0 -aiohappyeyeballs==2.4.0 -aiohttp==3.10.5 -aioitertools==0.12.0 -aiosignal==1.3.1 -alembic==1.13.2 -amqp==5.2.0 +a2wsgi==1.10.8 +aiologic==0.14.0 +aiosqlite==0.21.0 +alembic==1.16.1 annotated-types==0.7.0 -anyio==4.4.0 -apispec==6.6.1 -argcomplete==3.5.0 +anyio==4.9.0 +argcomplete==3.6.2 asgiref==3.8.1 -async-timeout==4.0.3 -atlasclient==1.0.0 -attrs==24.2.0 -babel==2.16.0 -backoff==2.2.1 -bcrypt==4.2.0 -blinker==1.8.2 -botocore==1.35.16 -cachelib==0.9.0 -certifi==2024.8.30 +attrs==25.3.0 +backports.strenum==1.3.1 +cadwyn==5.3.3 +certifi==2025.4.26 cffi==1.17.1 -cgroupspy==0.2.3 -charset-normalizer==3.3.2 -click==8.1.7 -clickclick==20.10.2 -cloudpickle==3.0.0 -colorama==0.4.6 -colorlog==6.8.2 -connexion==2.14.2 +charset-normalizer==3.4.2 +click==8.1.8 +colorlog==6.9.0 cron-descriptor==1.4.5 -croniter==3.0.3 -cryptography==43.0.1 -decorator==5.1.1 -dill==0.3.8 -distlib==0.3.8 -dnspython==2.6.1 -docopt==0.6.2 -docutils==0.21.2 +croniter==6.0.0 +cryptography==42.0.8 +dill==0.3.1.1 +dnspython==2.7.0 email_validator==2.2.0 -eventlet==0.37.0 -exceptiongroup==1.2.2 -fastavro==1.9.7 -filelock==3.16.0 -frozenlist==1.4.1 -fsspec==2024.9.0 -gevent==24.2.1 -google-re2==1.1.20240702 -googleapis-common-protos==1.65.0 -graphviz==0.20.3 -greenlet==3.1.0 -grpcio==1.66.1 -gssapi==1.8.3 +exceptiongroup==1.3.0 +fastapi-cli==0.0.7 +fastapi==0.115.12 +fsspec==2025.5.1 +googleapis-common-protos==1.70.0 +greenlet==3.2.2 +grpcio==1.72.1 gunicorn==23.0.0 -h11==0.14.0 -hdfs==2.7.3 -httpcore==1.0.5 -httpx==0.27.2 +h11==0.16.0 +httpcore==1.0.9 +httptools==0.6.4 +httpx==0.27.0 idna==3.10 -importlib_metadata==8.5.0 -importlib_resources==6.4.5 -inflection==0.5.1 -isodate==0.6.1 +importlib_metadata==8.4.0 itsdangerous==2.2.0 -jmespath==1.0.1 -jsonschema-specifications==2023.12.1 -jsonschema==4.23.0 -krb5==0.6.0 -lazy-object-proxy==1.10.0 -ldap3==2.9.1 -limits==3.13.0 +jsonschema-specifications==2025.4.1 +jsonschema==4.24.0 +lazy-object-proxy==1.11.0 +libcst==1.8.0 linkify-it-py==2.0.3 lockfile==0.12.2 -lxml==5.3.0 markdown-it-py==3.0.0 -marshmallow-oneofschema==3.1.1 -marshmallow-sqlalchemy==0.28.2 -marshmallow==3.22.0 -mdit-py-plugins==0.4.2 mdurl==0.1.2 methodtools==0.4.7 -more-itertools==10.5.0 -multidict==6.1.0 -numpy==1.26.4 -opentelemetry-api==1.16.0 -opentelemetry-exporter-otlp-proto-grpc==1.16.0 -opentelemetry-exporter-otlp-proto-http==1.16.0 -opentelemetry-exporter-otlp==1.16.0 -opentelemetry-exporter-prometheus==0.41b0 -opentelemetry-proto==1.16.0 -opentelemetry-sdk==1.16.0 -opentelemetry-semantic-conventions==0.37b0 -ordered-set==4.1.0 -packaging==24.1 -pandas==2.1.4 +more-itertools==10.7.0 +msgspec==0.19.0 +opentelemetry-api==1.27.0 +opentelemetry-exporter-otlp-proto-common==1.27.0 +opentelemetry-exporter-otlp-proto-grpc==1.27.0 +opentelemetry-exporter-otlp-proto-http==1.27.0 +opentelemetry-exporter-otlp==1.27.0 +opentelemetry-proto==1.27.0 +opentelemetry-sdk==1.27.0 +opentelemetry-semantic-conventions==0.48b0 +packaging==24.2 pathspec==0.12.1 -pendulum==3.0.0 -platformdirs==4.3.3 -pluggy==1.5.0 -plyvel==1.5.1 -prison==0.2.1 -prometheus_client==0.20.0 -protobuf==4.25.4 -psutil==6.0.0 -pure-sasl==0.6.2 -pyasn1==0.6.1 -pyasn1_modules==0.4.1 +pendulum==3.1.0 +pluggy==1.6.0 +protobuf==4.25.8 +psutil==7.0.0 pycparser==2.22 -pydantic==2.9.1 -pydantic_core==2.23.3 -pykerberos==1.2.4 -pyspnego==0.11.1 -python-daemon==3.0.1 +pydantic==2.11.5 +pydantic_core==2.33.2 +python-daemon==3.1.2 python-dateutil==2.9.0.post0 -python-ldap==3.4.4 -python-nvd3==0.16.0 +python-dotenv==1.1.0 +python-multipart==0.0.20 python-slugify==8.0.4 -python3-saml==1.16.0 -pytz==2024.2 -referencing==0.35.1 -requests-kerberos==0.15.0 -requests-toolbelt==1.0.0 +pytz==2025.2 +referencing==0.36.2 requests==2.32.3 -rfc3339-validator==0.1.4 -rich-argparse==1.5.2 -rich==13.8.1 -rpds-py==0.20.0 -s3fs==2024.9.0 -sentry-sdk==2.14.0 -setproctitle==1.3.3 -six==1.16.0 +retryhttp==1.3.3 +rich-argparse==1.7.1 +rich-toolkit==0.14.7 +rich==13.9.4 +rpds-py==0.25.1 +setproctitle==1.3.6 +shellingham==1.5.4 +six==1.17.0 sniffio==1.3.1 -sqlparse==0.5.1 -statsd==4.0.1 +sqlparse==0.5.3 +starlette==0.46.2 +structlog==25.4.0 +svcs==25.1.0 tabulate==0.9.0 -tenacity==9.0.0 -termcolor==2.4.0 +tenacity==9.1.2 +termcolor==3.1.0 text-unidecode==1.3 -thrift-sasl==0.4.3 -thrift==0.20.0 -time-machine==2.15.0 -typing_extensions==4.12.2 -tzdata==2024.1 +tomli==2.2.1 +typer==0.16.0 +types-requests==2.32.0.20250602 +typing-inspection==0.4.1 +typing_extensions==4.13.2 +tzdata==2025.2 uc-micro-py==1.0.3 -unicodecsv==0.14.1 -universal_pathlib==0.2.5 -urllib3==2.2.3 -uv==0.4.1 -vine==5.1.0 -virtualenv==20.26.4 -wirerope==0.4.7 -wrapt==1.16.0 -xmlsec==1.3.14 -yarl==1.11.1 -zipp==3.20.2 -zope.event==5.0 -zope.interface==7.0.3 +universal_pathlib==0.2.6 +urllib3==2.4.0 +uuid6==2024.7.10 +uv==0.7.8 +uvicorn==0.34.3 +uvloop==0.21.0 +watchfiles==1.0.5 +websockets==14.2 +wirerope==1.0.0 +wrapt==1.17.2 +zipp==3.22.0 diff --git a/dags/.airflowignore b/dags/.airflowignore index f2d4f0597..a76adb4b4 100644 --- a/dags/.airflowignore +++ b/dags/.airflowignore @@ -1,3 +1,4 @@ ^common_tasks.py$ ^custom_operators.py$ -^dag_functions.py$ \ No newline at end of file +^dag_functions.py$ +^dag_owners.py$ \ No newline at end of file diff --git a/dags/bluetooth_check_readers_temp.py b/dags/bluetooth_check_readers_temp.py index 89c8e44ca..a22507b7f 100644 --- a/dags/bluetooth_check_readers_temp.py +++ b/dags/bluetooth_check_readers_temp.py @@ -4,8 +4,7 @@ import pendulum from datetime import timedelta -from airflow.decorators import dag, task -from airflow.models import Variable +from airflow.sdk import dag, task from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator from airflow.providers.postgres.hooks.postgres import PostgresHook @@ -14,6 +13,7 @@ repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) # pylint: disable=wrong-import-position + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality from bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue # pylint: enable=import-error @@ -25,7 +25,7 @@ logging.basicConfig(level=logging.DEBUG) DAG_NAME = 'temp_bluetooth_check_readers' -DAG_OWNERS = Variable.get("dag_owners", deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) def format_br_list(returned_list): # Format broken reader list into a text for slack message. @@ -63,7 +63,7 @@ def blip_pipeline(): sql = '''SELECT (COUNT(*) > 0) AS "_check", 'There are no data inserted for '|| '{{ ds }}' AS msg FROM bluetooth.aggr_5min - WHERE datetime_bin >='{{ ds }}' and datetime_bin < '{{ tomorrow_ds }}' + WHERE datetime_bin >='{{ ds }}' and datetime_bin < '{{ macros.ds_add(ds, 1) }}' LIMIT 1''' ) diff --git a/dags/check_miovision.py b/dags/check_miovision.py deleted file mode 100644 index a44632782..000000000 --- a/dags/check_miovision.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Pipeline to run SQL data quality check on daily Miovision data pull. -Uses `miovision_api.determine_working_machine` to check if there are gaps of 4 hours or more for any camera. -Deprecated by `miovision_check` DAG. -""" -import sys -import os - -from airflow import DAG -from datetime import datetime, timedelta -from airflow.operators.python import PythonOperator -from airflow.providers.postgres.hooks.postgres import PostgresHook -from airflow.models import Variable - -import logging -import pendulum - -repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -sys.path.insert(0, repo_path) -from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert - -LOGGER = logging.getLogger(__name__) -logging.basicConfig(level=logging.DEBUG) - -dag_name = 'check_miovision' - -#To connect to pgadmin bot -mio_bot = PostgresHook("miovision_api_bot") -con = mio_bot.get_conn() - -def check_miovision(con, start_date, end_date): - date_range = (start_date, end_date) - LOGGER.info('Check if cameras are working for date range = %s', date_range) - with con.cursor() as cur: - working_machine = '''SELECT miovision_api.determine_working_machine(%s::date, %s::date)''' - cur.execute(working_machine, date_range) - LOGGER.info(con.notices[-1]) - while True: - broken_flag = cur.fetchall() - if not broken_flag: # if broken_flag returns an empty list - break - LOGGER.info(broken_flag) - raise Exception ('A Miovision camera may be broken!') - -# Get slack member ids -dag_owners = Variable.get('dag_owners', deserialize_json=True) - -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown - -default_args = {'owner': ','.join(names), - 'depends_on_past':False, - 'start_date': pendulum.datetime(2020, 7, 10, tz="America/Toronto"), - 'end_date': pendulum.datetime(2023, 12, 21, tz="America/Toronto"), - 'email_on_failure': False, - 'email_on_success': False, - 'retries': 0, - 'retry_delay': timedelta(minutes=5), - 'on_failure_callback': task_fail_slack_alert - } - -dag = DAG(dag_id = dag_name, - default_args=default_args, - schedule='0 7 * * *', - catchup=False, - tags = ['miovision', "data_checks", "archived"], - doc_md=__doc__ -) -# Run at 7 AM local time every day - -task1 = PythonOperator( - task_id = 'check_miovision', - python_callable = check_miovision, - dag=dag, - op_kwargs={ - 'con': con, - # execution date is by default a day before if the process runs daily - 'start_date': '{{ ds }}', - 'end_date' : '{{ macros.ds_add(ds, 1) }}' - } -) \ No newline at end of file diff --git a/dags/check_rescu.py b/dags/check_rescu.py deleted file mode 100644 index 4599fee05..000000000 --- a/dags/check_rescu.py +++ /dev/null @@ -1,73 +0,0 @@ -import sys -import os -from functools import partial - -from airflow import DAG -from datetime import timedelta -from airflow.operators.python import PythonOperator -from airflow.providers.postgres.hooks.postgres import PostgresHook -from airflow.models import Variable - -from psycopg2 import sql -import logging -import pendulum - -repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -sys.path.insert(0, repo_path) -from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert - -dag_name = 'rescu_check' - -LOGGER = logging.getLogger(__name__) -logging.basicConfig(level=logging.DEBUG) - -#To connect to pgadmin bot -rescu_bot = PostgresHook("rescu_bot") -con = rescu_bot.get_conn() - -def check_rescu(con, date_to_pull): - LOGGER.info('Pulling information for date = %s',date_to_pull) - with con.cursor() as cur: - check_raw = sql.SQL('''SELECT COUNT(raw_uid) FROM rescu.raw_15min WHERE dt = {}''').format(sql.Literal(date_to_pull)) - cur.execute(check_raw) - raw_num = cur.fetchone()[0] - LOGGER.info('There are %s rows of raw_date for %s', raw_num, date_to_pull) - - check_volume = sql.SQL('''SELECT COUNT(volume_uid) FROM rescu.volumes_15min WHERE datetime_bin::date = {}''').format(sql.Literal(date_to_pull)) - cur.execute(check_volume) - volume_num = cur.fetchone()[0] - LOGGER.info('There are %s rows of volume_15min for %s', volume_num, date_to_pull) - - if raw_num == 0 or raw_num < volume_num or volume_num < 7000: - raise Exception ('There is a PROBLEM here. There is no raw data OR raw_data is less than volume_15min OR volumes_15min is less than 7000 which is way too low') - -dag_owners = Variable.get('dag_owners', deserialize_json=True) - -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown - -default_args = {'owner': ','.join(names), - 'depends_on_past':False, - 'start_date': pendulum.datetime(2020, 4, 17, tz="America/Toronto"), - 'email_on_failure': False, - 'email_on_success': False, - 'retries': 0, - 'retry_delay': timedelta(minutes=5), - 'on_failure_callback': partial( - task_fail_slack_alert, - extra_msg="The total volume is too low. Either a lot of loop detectors are down or there's a problem in the pipeline." - ) - } - -dag = DAG(dag_id = dag_name, default_args=default_args, schedule='0 6 * * *', catchup=False) -# Run at 6 AM local time every day - -task1 = PythonOperator( - task_id = 'check_rescu', - python_callable = check_rescu, - dag=dag, - op_kwargs={ - 'con': con, - # execution date is by default a day before if the process runs daily - 'date_to_pull': '{{ ds }}' - } - ) diff --git a/dags/citywide_tti_aggregate.py b/dags/congestion_aggregate_temp.py similarity index 52% rename from dags/citywide_tti_aggregate.py rename to dags/congestion_aggregate_temp.py index 1257541a1..025662c0c 100644 --- a/dags/citywide_tti_aggregate.py +++ b/dags/congestion_aggregate_temp.py @@ -1,18 +1,17 @@ import sys import os +import logging +import pendulum +from datetime import timedelta -from datetime import datetime, timedelta +from airflow.sdk import dag from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.models import Variable -from airflow.decorators import dag, task -from airflow.macros import ds_add, ds_format -import logging -import pendulum try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert + from dags.dag_owners import owners except: raise ImportError("Cannot import slack alert functions") @@ -21,17 +20,14 @@ doc_md = """ -### The Daily Citywide TTI Aggregation DAG +### The Daily TTI Aggregation DAG -This DAG runs daily to aggregate citywide TTI after the pull_here dag finishes. +This DAG runs daily to aggregate congestion segments daily (23_4) TEMPORARILY after the pull_here dag finishes. Slack notifications is raised when the airflow process fails. """ -DAG_NAME = 'citywide_tti_aggregate' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) - -# Slack alert -SLACK_CONN_ID = 'slack_data_pipeline' +DAG_NAME = 'congestion_aggregate_temp' +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) default_args = {'owner': ','.join(DAG_OWNERS), 'depends_on_past':False, @@ -44,25 +40,23 @@ } @dag( - DAG_NAME, + DAG_NAME, default_args=default_args, schedule=None, # gets triggered by HERE dag doc_md = doc_md, - tags=["HERE", "aggregation"], + tags=["HERE", "temp"], catchup=False ) +def congestion_aggregate_temp(): + aggregate_temp = SQLExecuteQueryOperator( + sql="SELECT congestion.generate_network_daily_temp_23_4( '{{macros.ds_add(ds, -1)}}' )", + task_id='aggregate_temp', + conn_id='congestion_bot', + autocommit=True, + retries = 0 + ) + + aggregate_temp -def citywide_tti_aggregate(): - -# Task to aggregate citwyide tti - - aggregate_daily = SQLExecuteQueryOperator(sql="SELECT covid.generate_citywide_tti( '{{macros.ds_add(ds, -1)}}' )", - task_id='aggregate_daily', - conn_id='congestion_bot', - autocommit=True, - retries = 0 - ) - aggregate_daily - -citywide_tti_aggregate() \ No newline at end of file +congestion_aggregate_temp() diff --git a/dags/dag_owners.py b/dags/dag_owners.py new file mode 100644 index 000000000..ee57af2eb --- /dev/null +++ b/dags/dag_owners.py @@ -0,0 +1,42 @@ +owners = { + "bluetooth_itsc_pull": ["gabe"], + "collisions_replicator": ["natalie", "gabe", "maddy"], + "congestion_aggregate_temp": ["natalie"], + "congestion_aggregation": ["natalie", "gabe"], + "congestion_refresh": ["natalie", "gabe"], + "counts_replicator": ["natalie", "gabe", "maddy"], + "ecocounter_check": ["gabe", "david"], + "ecocounter_open_data": ["gabe"], + "ecocounter_pull": ["gabe"], + "eoy_table_create": ["natalie", "gabe"], + "gcc_pull_layers": ["gabe", "natalie"], + "here_dynamic_binning_agg": ["gabe"], + "lake_shore_signal_timing_refresh": ["natalie", "gabe"], + "log_cleanup": ["natalie", "gabe"], + "miovision_alerts": ["gabe"], + "miovision_check": ["david", "gabe"], + "miovision_hardware": ["gabe"], + "miovision_open_data": ["gabe"], + "miovision_pull": ["natalie", "gabe"], + "monitor_db_size": ["gabe", "natalie"], + "open_data_checks": ["gabe"], + "pull_here": ["natalie"], + "pull_here_path": ["natalie"], + "queens_park_aggregation": ["natalie"], + "rapidto_aggregation": ["natalie"], + "replicator_table_check": ["gabe", "peter"], + "rodars_pull": ["gabe"], + "temp_bluetooth_check_readers": ["natalie", "gabe"], + "traffic_signals_dag": ["natalie", "gabe"], + "traffic_signal_pull": ["natalie"], + "tti_aggregate": ["natalie"], + "vds_check": ["gabe"], + "vds_pull_vdsdata": ["gabe"], + "vds_pull_vdsvehicledata": ["gabe"], + "vz_google_sheets": ["natalie", "gabe"], + "vz_map_checks": ["gabe"], + "weather_pull": ["natalie", "gabe"], + "wys_check": ["david", "gabe"], + "wys_monthly_summary": ["natalie", "gabe"], + "wys_pull": ["gabe", "natalie"] +} diff --git a/dags/ecocounter_check.py b/dags/ecocounter_check.py index de2213d44..71d4d9249 100644 --- a/dags/ecocounter_check.py +++ b/dags/ecocounter_check.py @@ -9,13 +9,13 @@ import pendulum from datetime import timedelta -from airflow.decorators import dag -from airflow.models import Variable -from airflow.sensors.external_task import ExternalTaskSensor +from airflow.sdk import dag +from airflow.providers.standard.sensors.external_task import ExternalTaskSensor try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd from bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue except: @@ -25,7 +25,7 @@ logging.basicConfig(level=logging.INFO) DAG_NAME = 'ecocounter_check' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'volumes/ecocounter/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/ecocounter_open_data.py b/dags/ecocounter_open_data.py index 496499022..3d6a94915 100644 --- a/dags/ecocounter_open_data.py +++ b/dags/ecocounter_open_data.py @@ -8,17 +8,16 @@ import pendulum from functools import partial -from airflow.decorators import dag, task, task_group -from airflow.models import Variable -from airflow.hooks.base import BaseHook +from airflow.sdk.bases.hook import BaseHook from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.sensors.date_time import DateTimeSensor -from airflow.macros import ds_format -from airflow.operators.python import get_current_context +from airflow.providers.standard.sensors.date_time import DateTimeSensor +from airflow.sdk.execution_time.macros import ds_format +from airflow.sdk import dag, task, task_group, get_current_context, Variable try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from airflow3_bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, send_slack_msg, get_readme_docmd from airflow3_bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue except ModuleNotFoundError: @@ -30,7 +29,7 @@ logging.basicConfig(level=logging.DEBUG) DAG_NAME = 'ecocounter_open_data' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'volumes/ecocounter/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) @@ -89,7 +88,7 @@ def reminder_message(ds = None, **context): timeout=10*86400, mode="reschedule", poke_interval=3600*24, - target_time="{{ next_execution_date.replace(day=10) }}", + target_time="{{ data_interval_end.replace(day=10) }}", ) wait_till_10th.doc_md = """ Wait until the 10th day of the month to export data. Alternatively mark task as success to proceed immediately. diff --git a/dags/ecocounter_pull.py b/dags/ecocounter_pull.py index f71c8bd48..dd3f5bf12 100644 --- a/dags/ecocounter_pull.py +++ b/dags/ecocounter_pull.py @@ -13,18 +13,18 @@ import dateutil.parser import logging -from airflow.decorators import dag, task, task_group -from airflow.models import Variable -from airflow.hooks.base import BaseHook +from airflow.sdk import dag, task, task_group +from airflow.sdk.bases.hook import BaseHook from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.macros import ds_add +from airflow.sdk.execution_time.macros import ds_add from airflow.exceptions import AirflowSkipException -from airflow.sensors.external_task import ExternalTaskMarker +from airflow.providers.standard.sensors.external_task import ExternalTaskMarker try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import ( task_fail_slack_alert, slack_alert_data_quality, send_slack_msg, get_readme_docmd ) @@ -38,7 +38,7 @@ raise ImportError("Cannot import DAG helper functions.") DAG_NAME = 'ecocounter_pull' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'volumes/ecocounter/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/eoy_create_tables.py b/dags/eoy_create_tables.py index 55b4c63d2..7a62bc3fc 100644 --- a/dags/eoy_create_tables.py +++ b/dags/eoy_create_tables.py @@ -13,10 +13,9 @@ import logging from datetime import timedelta -from airflow.decorators import dag, task, task_group +from airflow.sdk import dag, task, task_group, Variable from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.models import Variable LOGGER = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -24,13 +23,14 @@ try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, send_slack_msg from bluetooth.sql.bt_eoy_create_tables import replace_bt_trigger except: raise ImportError("Cannot import DAG helper functions.") DAG_NAME = 'eoy_table_create' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) default_args = {'owner': ','.join(DAG_OWNERS), 'depends_on_past':False, diff --git a/dags/gcc_layers_pull.py b/dags/gcc_layers_pull.py index 2a9e56b82..2d0341003 100644 --- a/dags/gcc_layers_pull.py +++ b/dags/gcc_layers_pull.py @@ -4,15 +4,15 @@ from functools import partial import pendulum -from airflow.sdk import dag, task, task_group, get_current_context, Variable -from airflow.providers.postgres.hooks.postgres import PostgresHook +from airflow.sdk import dag, task, task_group, get_current_context, Variable, Param from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.models.param import Param +from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.exceptions import AirflowFailException try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from airflow3_bdit_dag_utils.utils.dag_functions import task_fail_slack_alert from gis.gccview.gcc_puller_functions import ( get_layer, mapserver_name, get_src_row_count, get_dest_row_count @@ -224,9 +224,9 @@ def trigger_aggs(conn_id, downstream_aggs): for item in filtered_dags: DAG_NAME = 'gcc_pull_layers' - DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) + DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) downstream_aggs = DAGS[item].get('downstream_aggs', []) - + DEFAULT_ARGS = { 'owner': ','.join(DAG_OWNERS), 'depends_on_past': False, diff --git a/dags/lake_shore_signal_timing_refresh.py b/dags/lake_shore_signal_timing_refresh.py new file mode 100644 index 000000000..5505fcd7f --- /dev/null +++ b/dags/lake_shore_signal_timing_refresh.py @@ -0,0 +1,88 @@ +import os +import sys +import logging +import pendulum +from datetime import timedelta + +from airflow.sdk import dag +from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator, SQLCheckOperator +from airflow.operators.sql import SQLCheckOperator + +try: + repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) + sys.path.insert(0, repo_path) + from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert + from dags.dag_owners import owners +except ImportError: + raise ImportError("Cannot import DAG helper functions.") + +LOGGER = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +DAG_NAME = 'lake_shore_signal_timing_refresh' +DAG_OWNERS = owners.get(DAG_NAME, ['Unknown']) + +default_args = {'owner': ','.join(DAG_OWNERS), + 'depends_on_past':False, + 'start_date': pendulum.datetime(2022, 3, 2, tz="America/Toronto"), + 'email_on_failure': False, + 'email_on_success': False, + 'retries': 0, + 'retry_delay': timedelta(minutes=5), + 'on_failure_callback': task_fail_slack_alert + } + +@dag( + dag_id=DAG_NAME, + default_args=default_args, + schedule='59 18 * * WED', # Every Wednesday at 7 PM, pull here runs at 4:30 PM + catchup=False, +) +def lake_shore_signal_timing_refresh(): + + ## Tasks ## + # SQLSensor to test if all of last week's data is in the database + # date set as ds - 9 because this task runs on a wednesday and the input should be last monday + check_here = SQLCheckOperator( + task_id = 'check_here', + conn_id='here_bot', + sql = ''' select case when count(distinct dt) = 7 then TRUE else FALSE end as counts + from here.ta + where dt >= '{{ macros.ds_add(ds, -2) }}'::timestamp and + dt < '{{ macros.ds_add(ds, -2) }}'::timestamp + interval '1 week';''' + ) + + check_bt = SQLCheckOperator( + task_id = 'check_bt', + conn_id='bt_bot', + sql = '''select case when count(distinct date(datetime_bin)) = 7 then TRUE else FALSE end as counts + from bluetooth.aggr_5min + where datetime_bin >= '{{ macros.ds_add(ds, -2) }}'::timestamp AND + datetime_bin < '{{ macros.ds_add(ds, -2) }}'::timestamp + interval '1 week';''' + ) + + ## Postgres Tasks ## + # Task to aggregate bluetooth data weekly + aggregate_bt = SQLExecuteQueryOperator( + sql='''select data_analysis.generate_lakeshore_weekly_bt('{{ macros.ds_add(ds, -2) }}')''', + task_id='aggregate_bt_weekly', + conn_id='bt_bot', + autocommit=True, + retries = 0 + ) + + # Task to aggregate here data weekly + aggregate_here = SQLExecuteQueryOperator( + sql='''select data_analysis.generate_lakeshore_weekly_here('{{ macros.ds_add(ds, -2) }}')''', + task_id='aggregate_here_weekly', + conn_id='here_bot', + autocommit=True, + retries = 0 + ) + + ## Flow ## + # Once check tasks are marked as successful, aggregation task can be scheduled + check_here >> aggregate_here + check_bt >> aggregate_bt + +lake_shore_signal_timing_refresh() \ No newline at end of file diff --git a/dags/log_cleanup.py b/dags/log_cleanup.py index 38885db57..4082c4a70 100644 --- a/dags/log_cleanup.py +++ b/dags/log_cleanup.py @@ -8,7 +8,7 @@ import os import sys import pendulum -from airflow import DAG +from airflow.sdk import DAG, Variable AIRFLOW_DAGS = os.path.dirname(os.path.realpath(__file__)) AIRFLOW_ROOT = os.path.dirname(AIRFLOW_DAGS) @@ -16,8 +16,7 @@ AIRFLOW_TASKS_LIB = os.path.join(AIRFLOW_TASKS, 'lib') from airflow.configuration import conf -from airflow.operators.bash import BashOperator -from airflow.models import Variable +from airflow.providers.standard.operators.bash import BashOperator dag_name = 'log_cleanup' diff --git a/dags/miovision_check.py b/dags/miovision_check.py index 8af6b86f1..51c2570b4 100644 --- a/dags/miovision_check.py +++ b/dags/miovision_check.py @@ -9,13 +9,13 @@ import pendulum from datetime import timedelta -from airflow.decorators import dag -from airflow.models import Variable -from airflow.sensors.external_task import ExternalTaskSensor +from airflow.sdk import dag +from airflow.providers.standard.sensors.external_task import ExternalTaskSensor try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd from bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue except: @@ -25,7 +25,7 @@ logging.basicConfig(level=logging.INFO) DAG_NAME = 'miovision_check' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'volumes/miovision/api/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/miovision_hardware.py b/dags/miovision_hardware.py index c82eb68ed..4c6fcb0c3 100644 --- a/dags/miovision_hardware.py +++ b/dags/miovision_hardware.py @@ -3,13 +3,13 @@ import pendulum from datetime import timedelta -from airflow.decorators import dag, task -from airflow.models import Variable +from airflow.sdk import dag, task from airflow.providers.postgres.hooks.postgres import PostgresHook try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, get_readme_docmd from volumes.miovision.api.configuration_info import ( get_cameras, get_configuration_dates @@ -18,7 +18,7 @@ raise ImportError("Cannot import DAG helper functions.") DAG_NAME = 'miovision_hardware' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'volumes/miovision/api/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/miovision_pull.py b/dags/miovision_pull.py index 1684ac8a6..daf5309a6 100644 --- a/dags/miovision_pull.py +++ b/dags/miovision_pull.py @@ -8,17 +8,16 @@ import os import pendulum -from airflow.decorators import dag, task, task_group -from airflow.models.param import Param -from airflow.models import Variable +from airflow.sdk import dag, task, task_group, Param from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.sensors.external_task import ExternalTaskMarker +from airflow.providers.standard.sensors.external_task import ExternalTaskMarker from airflow.providers.postgres.hooks.postgres import PostgresHook -from airflow.macros import ds_add +from airflow.sdk.execution_time.macros import ds_add try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd from bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue from bdit_dag_utils.utils.common_tasks import check_jan_1st, check_1st_of_month, wait_for_weather_timesensor @@ -31,7 +30,7 @@ raise ImportError("Cannot import DAG helper functions.") DAG_NAME = 'miovision_pull' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'volumes/miovision/api/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/open_data_checks.py b/dags/open_data_checks.py index b939a93e5..b2e07f1f9 100644 --- a/dags/open_data_checks.py +++ b/dags/open_data_checks.py @@ -15,20 +15,20 @@ from datetime import datetime, timedelta import dateutil.parser -from airflow.decorators import dag, task -from airflow.models import Variable +from airflow.sdk import dag, task, Variable from airflow.models.taskinstance import TaskInstance from airflow.exceptions import AirflowFailException LOGGER = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -DAG_NAME = 'open_data_checks' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ['Unknown']) - repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) -from dags.dag_functions import task_fail_slack_alert, send_slack_msg +from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, send_slack_msg +from dags.dag_owners import owners + +DAG_NAME = 'open_data_checks' +DAG_OWNERS = owners.get(DAG_NAME, ['Unknown']) default_args = { 'owner': ','.join(DAG_OWNERS), diff --git a/dags/pull_here.py b/dags/pull_here.py index 7c8e319c0..8cada93e3 100644 --- a/dags/pull_here.py +++ b/dags/pull_here.py @@ -5,17 +5,17 @@ import sys import os import pendulum - from datetime import timedelta -from airflow.hooks.base import BaseHook + +from airflow.sdk.bases.hook import BaseHook from airflow.operators.trigger_dagrun import TriggerDagRunOperator -from airflow.models import Variable -from airflow.decorators import dag, task, task_group -from airflow.macros import ds_add, ds_format +from airflow.sdk import dag, task, task_group, Variable +from airflow.sdk.execution_time.macros import ds_add, ds_format try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert from here.traffic.here_api import query_dates, get_access_token, get_download_url, HereAPIException except: @@ -32,8 +32,7 @@ """ dag_name = 'pull_here' -dag_owners = Variable.get('dag_owners', deserialize_json=True) -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown +names = owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown default_args = {'owner': ','.join(names), 'depends_on_past':False, diff --git a/dags/pull_here_path.py b/dags/pull_here_path.py index cfa8b47fb..fc15e72f4 100644 --- a/dags/pull_here_path.py +++ b/dags/pull_here_path.py @@ -3,14 +3,14 @@ import pendulum from datetime import timedelta -from airflow.decorators import task, dag -from airflow.hooks.base import BaseHook -from airflow.models import Variable -from airflow.macros import ds_add, ds_format +from airflow.sdk import task, dag +from airflow.sdk.bases.hook import BaseHook +from airflow.sdk.execution_time.macros import ds_add, ds_format try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert from here.traffic.here_api import get_access_token, get_download_url from here.traffic.here_api_path import query_dates @@ -27,8 +27,7 @@ """ dag_name = 'pull_here_path' -dag_owners = Variable.get('dag_owners', deserialize_json=True) -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown +names = owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown default_args = {'owner': ','.join(names), 'depends_on_past':False, diff --git a/dags/pull_interventions_dag.py b/dags/pull_interventions_dag.py deleted file mode 100644 index 1968911a1..000000000 --- a/dags/pull_interventions_dag.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -Pipeline to pull CurbTO Intervetions daily and put them into postgres tables using Bash Operator. -Slack notifications is raised when the airflow process fails. -""" -import sys -import os - -import pendulum -from airflow import DAG -from datetime import timedelta -from airflow.operators.bash import BashOperator -from airflow.models import Variable - -repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -sys.path.insert(0, repo_path) -from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert - -dag_name = 'automate_interventions' - -dag_owners = Variable.get('dag_owners', deserialize_json=True) - -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown - -default_args = {'owner':names, - 'depends_on_past':False, - 'start_date': pendulum.datetime(2020, 5, 26, tz="America/Toronto"), - 'email_on_failure': False, - 'email_on_success': False, - 'retries': 0, - 'retry_delay': timedelta(minutes=5), - 'on_failure_callback': task_fail_slack_alert - } - -dag = DAG(dag_id = dag_name, default_args = default_args, schedule = '0 0 * * *') - - -t1 = BashOperator( - task_id = 'pull_interventions', - bash_command = '''/etc/airflow/data_scripts/.venv/bin/python3 /etc/airflow/data_scripts/gis/gccview/pull_interventions.py --mapserver='ActiveTO_and_CurbTO_View' --id=0''', - retries = 0, - dag=dag) \ No newline at end of file diff --git a/dags/pull_weather.py b/dags/pull_weather.py deleted file mode 100644 index 7da644bb3..000000000 --- a/dags/pull_weather.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -Pipeline to pull weather prediction data from Envrionment Canada and upsert into weather.prediction_daily table. -Then, -A Slack notification is raised when the airflow process fails. -""" -import os -import sys -import pendulum -from airflow import DAG -from datetime import timedelta -from airflow.operators.python import PythonOperator -from airflow.providers.postgres.hooks.postgres import PostgresHook -from airflow.models import Variable -from airflow.operators.latest_only import LatestOnlyOperator - -# DAG Information -dag_name = 'pull_weather' - -#connection credentials -cred = PostgresHook("weather_bot") - -#import python scripts -try: - repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) - sys.path.insert(0, repo_path) - from weather.prediction_import import prediction_upsert - from weather.historical_scrape import historical_upsert - from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert -except: - raise ImportError("script import failed") - - -dag_owners = Variable.get('dag_owners', deserialize_json=True) - -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown - -#DAG - -default_args = { - 'owner': 'Natalie', - 'depends_on_past':False, - 'start_date': pendulum.datetime(2022, 11, 8, tz="America/Toronto"), - 'end_date': pendulum.datetime(2024, 6, 3, tz="America/Toronto"), - 'email_on_failure': False, - 'email_on_success': False, - 'retries': 0, - 'retry_delay': timedelta(minutes=5), - 'on_failure_callback': task_fail_slack_alert -} - -dag = DAG( - dag_id = dag_name, - default_args=default_args, - schedule='30 10 * * *', - tags=['weather', 'data_pull'], - catchup=False) - -#=======================================# -# Pull predicted weather data - can ONLY pull 5 days ahead of run date - no backfill. -no_backfill = LatestOnlyOperator(task_id="no_backfill", dag=dag) - -# TASKS - -## Pull weather forcast for 5 days ahead of run date -PULL_PREDICTION = PythonOperator( - task_id = 'pull_prediction', - python_callable = prediction_upsert, - dag=dag, - op_args=[cred] -) - -## Pull yesterday's historical data for Toronto city centre -PULL_HISTORICAL_CITY = PythonOperator( - task_id = 'pull_historical_city', - python_callable = historical_upsert, - dag=dag, - op_args=[cred, '{{ yesterday_ds }}', 31688] -) -## Pull yesterday's historical data for Toronto Peason Airport -PULL_HISTORICAL_AIRPORT = PythonOperator( - task_id = 'pull_historical_airport', - python_callable = historical_upsert, - dag=dag, - op_args=[cred, '{{ yesterday_ds }}', 51459] -) - -no_backfill >> PULL_PREDICTION -PULL_HISTORICAL_CITY -PULL_HISTORICAL_AIRPORT diff --git a/dags/queen_park_refresh.py b/dags/queen_park_refresh.py new file mode 100644 index 000000000..d271a9c75 --- /dev/null +++ b/dags/queen_park_refresh.py @@ -0,0 +1,73 @@ +import os +import sys +import logging +import pendulum +from datetime import datetime, timedelta + +#from airflow.sdk import dag, task #also uncomment task_fail_slack_alert! +from airflow.decorators import dag, task +from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator, SQLCheckOperator + +try: + repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) + sys.path.insert(0, repo_path) + from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert + from dags.dag_owners import owners +except ImportError: + raise ImportError("Cannot import DAG helper functions.") + +LOGGER = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +DAG_NAME = 'queens_park_aggregation' +DAG_OWNERS = owners.get(DAG_NAME, ['Unknown']) + +default_args = {'owner': ','.join(DAG_OWNERS), + 'depends_on_past':False, + 'start_date': pendulum.datetime(2022, 9, 27, tz="America/Toronto"), + 'email_on_failure': False, + 'email_on_success': False, + 'retries': 0, + 'retry_delay': timedelta(minutes=5), + #'on_failure_callback': task_fail_slack_alert + } + +@dag( + dag_id = DAG_NAME, + default_args=default_args, + schedule=None, # get triggered by here dag + catchup=False, +) +def queen_park_refresh(): + + ## ShortCircuitOperator Tasks, python_callable returns True or False; False means skip downstream tasks + @task.short_circuit() + def check_dow(ds=None): + execution_date = datetime.strptime(ds, "%Y-%m-%d") - timedelta(days=1) + return execution_date.weekday() == 0 + + ## Postgres Tasks + # Task to aggregate citywide tti daily + check_weekly = SQLCheckOperator( + task_id = 'check_weekly', + conn_id = 'here_bot', + sql = '''SELECT case when count(distinct dt) = 7 + THEN TRUE ELSE FALSE + END AS counts + FROM here.ta + WHERE + dt >= '{{ ds }}'::date - 8 + AND dt < '{{ ds }}'::date - 1''' + ) + + aggregate_queens_park = SQLExecuteQueryOperator( + sql='''select data_analysis.generate_queens_park_weekly('{{ macros.ds_add(ds, -8) }}')''', + task_id='aggregate_queens_park', + conn_id='here_bot', + autocommit=True, + retries = 0 + ) + + check_dow() >> check_weekly >> aggregate_queens_park + +queen_park_refresh() \ No newline at end of file diff --git a/dags/rapidto_aggregation.py b/dags/rapidto_aggregation.py new file mode 100644 index 000000000..239b4c65c --- /dev/null +++ b/dags/rapidto_aggregation.py @@ -0,0 +1,64 @@ +""" +airflow_rapidto_aggregation + +A data aggregation workflow that aggregates travel time data for +RapidTO, inserting hourly travel times for each report_id +into rapidto.report_daily_hr_summary daily. +This DAG is schedule to run only when pull_here DAG +finished running. +""" +import os +import sys +import logging +import pendulum +from datetime import timedelta + +from airflow.sdk import dag +from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator + +try: + repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) + sys.path.insert(0, repo_path) + from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert + from dags.dag_owners import owners +except ImportError: + raise ImportError("Cannot import DAG helper functions.") + +LOGGER = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +DAG_NAME = 'rapidto_aggregation' +DAG_OWNERS = owners.get(DAG_NAME, ['Unknown']) + +default_args = {'owner': ','.join(DAG_OWNERS), + 'depends_on_past':False, + 'start_date': pendulum.datetime(2023, 4, 1, tz="America/Toronto"), + 'email_on_failure': False, + 'email_on_success': False, + 'retries': 0, + 'retry_delay': timedelta(minutes=5), + 'on_failure_callback': task_fail_slack_alert + } + +@dag( + dag_id = DAG_NAME, + default_args=default_args, + schedule=None, #gets triggered by here dag + catchup=False, + tags=["HERE"] +) +def rapidto_aggregation(): + ## Postgres Tasks + # Task to aggregate segment level tt daily + aggregate_daily = SQLExecuteQueryOperator( + sql='''SELECT rapidto.generate_daily_hr_agg('{{ macros.ds_add(ds, -1) }}')''', + task_id='aggregate_daily', + conn_id='natalie', + autocommit=True, + retries = 0 + ) + + #wait_for_here >> + aggregate_daily + +rapidto_aggregation() \ No newline at end of file diff --git a/dags/refresh_wys_monthly.py b/dags/refresh_wys_monthly.py deleted file mode 100644 index 0bcedb86e..000000000 --- a/dags/refresh_wys_monthly.py +++ /dev/null @@ -1,91 +0,0 @@ -""" -Refresh WYS Materialized Views and run monthly aggregation function for Open Data. -A Slack notification is raised when the airflow process fails. -""" -import sys -import os -import pendulum - -from airflow import DAG -from datetime import datetime, timedelta -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.models import Variable -from dateutil.relativedelta import relativedelta - -repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -sys.path.insert(0, repo_path) -from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, get_readme_docmd - -dag_name = 'wys_monthly_summary' -dag_owners = Variable.get('dag_owners', deserialize_json=True) -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown - -README_PATH = os.path.join(repo_path, 'wys/api/readme.md') -DOC_MD = get_readme_docmd(README_PATH, dag_name) - -default_args = {'owner': ','.join(names), - 'depends_on_past':False, - 'start_date': pendulum.datetime(2020, 4, 30, tz="America/Toronto"), - 'email_on_failure': False, - 'email_on_success': False, - 'retries': 0, - 'retry_delay': timedelta(minutes=5), - 'on_failure_callback': task_fail_slack_alert - } - -def last_month(ds): - dt = datetime.strptime(ds, "%Y-%m-%d") - # Don't need to add `months=1` because that seems to be taken care of with - # the monthly scheduling - return (dt - relativedelta(day=1)).strftime("%Y-%m-%d") - -with DAG(dag_id = dag_name, - default_args=default_args, - max_active_runs=1, - tags=["wys", "aggregation", "monthly"], - doc_md = DOC_MD, - user_defined_macros={ - 'last_month' : last_month - }, - schedule='0 3 2 * *') as monthly_summary: - wys_view_stat_signs = SQLExecuteQueryOperator( - #sql in bdit_data-sources/wys/api/sql/mat-view-stationary-signs.sql - sql='SELECT wys.refresh_mat_view_stationary_signs()', - task_id='wys_view_stat_signs', - conn_id='wys_bot', - autocommit=True, - retries = 0) - wys_view_mobile_api_id = SQLExecuteQueryOperator( - #sql in bdit_data-sources/wys/api/sql/function-refresh_mat_view_mobile_api_id.sql - #sql in bdit_data-sources/wys/api/sql/create-view-mobile_api_id.sql - sql='SELECT wys.refresh_mat_view_mobile_api_id()', - task_id='wys_view_mobile_api_id', - conn_id='wys_bot', - autocommit=True, - retries = 0) - od_wys_view = SQLExecuteQueryOperator( - #sql in bdit_data-sources/wys/api/sql/open_data/mat-view-stationary-locations.sql - sql='SELECT wys.refresh_od_mat_view()', - task_id='od_wys_view', - conn_id='wys_bot', - autocommit=True, - retries = 0) - wys_mobile_summary = SQLExecuteQueryOperator( - #sql in bdit_data-sources/wys/api/sql/function-mobile-summary.sql - sql="SELECT wys.mobile_summary_for_month('{{ last_month(ds) }}')", - task_id='wys_mobile_summary', - conn_id='wys_bot', - autocommit=True, - retries = 0) - wys_stat_summary = SQLExecuteQueryOperator( - #sql in bdit_data-sources/wys/api/sql/function-stationary-sign-summary.sql - sql="SELECT wys.stationary_summary_for_month('{{ last_month(ds) }}')", - task_id='wys_stat_summary', - conn_id='wys_bot', - autocommit=True, - retries = 0) - - # Stationary signs - wys_view_stat_signs >> [wys_stat_summary, od_wys_view] - # Mobile signs - wys_view_mobile_api_id >> wys_mobile_summary \ No newline at end of file diff --git a/dags/replicator_table_check.py b/dags/replicator_table_check.py index 00526f043..7197b7437 100644 --- a/dags/replicator_table_check.py +++ b/dags/replicator_table_check.py @@ -11,22 +11,23 @@ import os import sys import pendulum + # pylint: disable=import-error -from airflow.decorators import dag, task -from airflow.models import Variable +from airflow.sdk import dag, task, Variable from airflow.exceptions import AirflowFailException from airflow.providers.postgres.hooks.postgres import PostgresHook -from airflow.operators.latest_only import LatestOnlyOperator +from airflow.providers.standard.operators.latest_only import LatestOnlyOperator # import custom operators and helper functions repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) +from dags.dag_owners import owners # pylint: disable=wrong-import-position from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, get_readme_docmd # pylint: enable=import-error DAG_NAME = 'replicator_table_check' -DAG_OWNERS = Variable.get("dag_owners", deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'collisions/Readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/replicators.py b/dags/replicators.py index 57dda0e4a..f0f2cb49f 100644 --- a/dags/replicators.py +++ b/dags/replicators.py @@ -6,15 +6,15 @@ from functools import partial from datetime import timedelta import pendulum + # pylint: disable=import-error -from airflow.decorators import dag, task, task_group -from airflow.models import Variable +from airflow.sdk import dag, task, task_group, get_current_context, Variable from airflow.exceptions import AirflowFailException -from airflow.operators.python import get_current_context # import custom operators and helper functions repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) +from dags.dag_owners import owners # pylint: disable=wrong-import-position from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, send_slack_msg, check_not_empty # pylint: enable=import-error @@ -104,7 +104,7 @@ def status_message(tables, **context): #generate replicator DAGs from dict for replicator, dag_items in REPLICATORS.items(): DAG_NAME = dag_items['dag_name'] - DAG_OWNERS = Variable.get("dag_owners", deserialize_json=True).get(DAG_NAME, ["Unknown"]) + DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) default_args = { "owner": ",".join(DAG_OWNERS), diff --git a/dags/rodars_pull.py b/dags/rodars_pull.py index afde3753b..2d5995c67 100644 --- a/dags/rodars_pull.py +++ b/dags/rodars_pull.py @@ -3,20 +3,21 @@ import pendulum from functools import partial -from airflow.sdk import dag, task, task_group, Variable +from airflow.sdk import dag, task, task_group from airflow.providers.postgres.hooks.postgres import PostgresHook -DAG_NAME = 'rodars_pull' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ['Unknown']) - repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) +from dags.dag_owners import owners from events.road_permits.rodars_functions import ( fetch_and_insert_issue_data, fetch_and_insert_location_data ) -from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd -from bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue +from airflow3_bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd +from airflow3_bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue + +DAG_NAME = 'rodars_pull' +DAG_OWNERS = owners.get(DAG_NAME, ['Unknown']) README_PATH = os.path.join(repo_path, 'events/road_permits/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/assets_pull.py b/dags/traffic_signals_dag.py similarity index 90% rename from dags/assets_pull.py rename to dags/traffic_signals_dag.py index b455f7d3d..250ed7b4b 100644 --- a/dags/assets_pull.py +++ b/dags/traffic_signals_dag.py @@ -22,39 +22,22 @@ from psycopg2 import sql import requests from psycopg2.extras import execute_values -from airflow import DAG -from airflow.operators.python import PythonOperator -from airflow.models import Variable - from dateutil.parser import parse -from datetime import datetime import pendulum -dag_name = 'traffic_signals_dag' - -# Credentials +from airflow.sdk import dag, task from airflow.providers.postgres.hooks.postgres import PostgresHook -vz_cred = PostgresHook("vz_api_bot") # name of Conn Id defined in UI -#vz_pg_uri = vz_cred.get_uri() # connection to RDS for psql via BashOperator -# ------------------------------------------------------------------------------ # Slack notification repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) +from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert -dag_owners = Variable.get('dag_owners', deserialize_json=True) - -names = dag_owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown - - -# ------------------------------------------------------------------------------ -AIRFLOW_DAGS = os.path.dirname(os.path.realpath(__file__)) -AIRFLOW_ROOT = os.path.dirname(AIRFLOW_DAGS) -AIRFLOW_TASKS = os.path.join(AIRFLOW_ROOT, 'assets/rlc/airflow/tasks') +DAG_NAME = 'traffic_signals_dag' +names = owners.get(DAG_NAME, ['Unknown']) #find dag owners w/default = Unknown DEFAULT_ARGS = { - 'email': ['Cathy.Nangini@toronto.ca'], 'email_on_failure': True, 'email_on_retry': True, 'owner': 'airflow', @@ -64,7 +47,7 @@ } # ------------------------------------------------------------------------------ -def pull_rlc(): +def pull_rlc(vz_cred): ''' Connect to bigdata RDS, pull Red Light Camera json file from Open Data API, and overwrite existing rlc table in the vz_safety_programs_staging schema. @@ -149,7 +132,7 @@ def insert_data(conn, col_names, rows, ts_type): execute_values(cur, insert_query, rows) # ------------------------------------------------------------------------------ -def pull_aps(): +def pull_aps(vz_cred): ''' Pulls Accessible Pedestrian Signals @@ -188,7 +171,7 @@ def pull_aps(): insert_data(conn, col_names, rows, 'Audible Pedestrian Signals') # ------------------------------------------------------------------------------ -def pull_pxo(): +def pull_pxo(vz_cred): ''' Pulls Pedestrian Crossovers ''' @@ -275,7 +258,7 @@ def lastest_imp_date(obj): formatted_date = latest_date.strftime("%Y-%m-%d") return formatted_date -def pull_lpi(): +def pull_lpi(vz_cred): ''' Pulls Pedestrian Head Start Signals/Leading Pedestrian Intervals ''' @@ -334,7 +317,7 @@ def identify_temp_signals(px): return 'Temporary' return None -def pull_traffic_signal(): +def pull_traffic_signal(vz_cred): ''' This function would pull all records from https://secure.toronto.ca/opendata/cart/traffic_signals/v3?format=json into the bigdata database. One copy will be in vz_safety_programs_staging.signals_cart while another will be in @@ -432,41 +415,43 @@ def pull_traffic_signal(): # ------------------------------------------------------------------------------ # Set up the dag and task -TRAFFIC_SIGNALS_DAG = DAG( - dag_id = dag_name, +@dag( + dag_id=DAG_NAME, default_args=DEFAULT_ARGS, max_active_runs=1, - template_searchpath=[os.path.join(AIRFLOW_ROOT, 'assets/rlc/airflow/tasks')], tags=["bdit_data-sources", "data_pull", "traffic_signals"], - schedule='0 4 * * 1-5') - # minutes past each hour | Hours (0-23) | Days of the month (1-31) | Months (1-12) | Days of the week (0-7, Sunday represented as either/both 0 and 7) - -PULL_RLC = PythonOperator( - task_id='pull_rlc', - python_callable=pull_rlc, - dag=TRAFFIC_SIGNALS_DAG -) - -PULL_APS = PythonOperator( - task_id='pull_aps', - python_callable=pull_aps, - dag=TRAFFIC_SIGNALS_DAG -) - -PULL_PXO = PythonOperator( - task_id='pull_pxo', - python_callable=pull_pxo, - dag=TRAFFIC_SIGNALS_DAG -) - -PULL_LPI = PythonOperator( - task_id='pull_lpi', - python_callable=pull_lpi, - dag=TRAFFIC_SIGNALS_DAG -) - -PULL_TS = PythonOperator( - task_id='pull_ts', - python_callable=pull_traffic_signal, - dag=TRAFFIC_SIGNALS_DAG + schedule='0 4 * * 1-5' #4am, monday-friday ) +def traffic_signals_dag(): + @task(task_id="pull_rlc") + def pull_rlc_task(): + vz_cred = PostgresHook("vz_api_bot") + pull_rlc(vz_cred) + + @task(task_id="pull_aps") + def pull_aps_task(): + vz_cred = PostgresHook("vz_api_bot") + pull_aps(vz_cred) + + @task(task_id="pull_pxo") + def pull_pxo_task(): + vz_cred = PostgresHook("vz_api_bot") + pull_pxo(vz_cred) + + @task(task_id="pull_lpi") + def pull_lpi_task(): + vz_cred = PostgresHook("vz_api_bot") + pull_lpi(vz_cred) + + @task(task_id="pull_ts") + def pull_ts_task(): + vz_cred = PostgresHook("vz_api_bot") + pull_traffic_signal(vz_cred) + + pull_rlc_task() + pull_aps_task() + pull_pxo_task() + pull_lpi_task() + pull_ts_task() + +traffic_signals_dag() \ No newline at end of file diff --git a/dags/tti_aggregate.py b/dags/tti_aggregate.py index a3b30fe9d..70230c1f0 100644 --- a/dags/tti_aggregate.py +++ b/dags/tti_aggregate.py @@ -1,17 +1,17 @@ import sys import os - +import logging +import pendulum from datetime import datetime, timedelta + from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.models import Variable -from airflow.decorators import dag, task -from airflow.macros import ds_add, ds_format +from airflow.sdk import dag, task +from airflow.sdk.execution_time.macros import ds_add, ds_format -import logging -import pendulum try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert except: raise ImportError("Cannot import slack alert functions") @@ -28,7 +28,7 @@ """ DAG_NAME = 'tti_aggregate' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) default_args = {'owner': ','.join(DAG_OWNERS), 'depends_on_past':False, diff --git a/dags/vds_check.py b/dags/vds_check.py index 18e3eb7cd..da4e90afb 100644 --- a/dags/vds_check.py +++ b/dags/vds_check.py @@ -10,14 +10,15 @@ from datetime import timedelta from functools import partial -from airflow.decorators import dag -from airflow.models import Variable -from airflow.sensors.external_task import ExternalTaskSensor +from airflow.sdk import dag try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) - from airflow3_bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd + from dags.dag_owners import owners + from airflow3_bdit_dag_utils.utils.dag_functions import ( + task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd + ) from airflow3_bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue except: raise ImportError("Cannot import DAG helper functions.") @@ -26,7 +27,7 @@ logging.basicConfig(level=logging.INFO) DAG_NAME = 'vds_check' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'volumes/vds/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) @@ -53,18 +54,8 @@ ) def vds_check_dag(): - t_upstream_done = ExternalTaskSensor( - task_id="starting_point", - external_dag_id="vds_pull_vdsdata", - external_task_id="done", - poke_interval=3600, #retry hourly - mode="reschedule", - timeout=86400, #one day - execution_delta=timedelta(days=-6, hours=1) #pull_vds scheduled at '0 4 * * *' - ) - check_missing_centreline_id = SQLCheckOperatorWithReturnValue( - on_failure_callback=slack_alert_data_quality, + on_failure_callback=partial(slack_alert_data_quality, use_proxy=True), task_id="check_missing_centreline_id", sql="select-missing_centreline.sql", conn_id="vds_bot" @@ -74,7 +65,7 @@ def vds_check_dag(): ''' check_missing_expected_bins = SQLCheckOperatorWithReturnValue( - on_failure_callback=slack_alert_data_quality, + on_failure_callback=partial(slack_alert_data_quality, use_proxy=True), task_id="check_missing_expected_bins", sql="select-missing_expected_bins.sql", conn_id="vds_bot" @@ -83,9 +74,7 @@ def vds_check_dag(): Identify intersections which appeared within the lookback period that did not appear today. ''' - t_upstream_done >> [ - check_missing_centreline_id, - check_missing_expected_bins - ] - + check_missing_centreline_id, + check_missing_expected_bins + vds_check_dag() \ No newline at end of file diff --git a/dags/vds_pull_vdsdata.py b/dags/vds_pull_vdsdata.py index 07fb0f5b7..f1eb613a9 100755 --- a/dags/vds_pull_vdsdata.py +++ b/dags/vds_pull_vdsdata.py @@ -3,23 +3,25 @@ from datetime import datetime, timedelta from functools import partial -from airflow.sdk import dag, task_group, task, Variable +from airflow.sdk import dag, task_group, task, TriggerRule from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.providers.standard.sensors.external_task import ExternalTaskMarker - -DAG_NAME = 'vds_pull_vdsdata' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ['Unknown']) +from airflow.providers.standard.operators.empty import EmptyOperator +from airflow.operators.trigger_dagrun import TriggerDagRunOperator repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) +from dags.dag_owners import owners from volumes.vds.py.vds_functions import ( pull_raw_vdsdata, pull_detector_inventory, pull_entity_locations, pull_commsdeviceconfig ) from airflow3_bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd from airflow3_bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue -from airflow3_bdit_dag_utils.utils.common_tasks import check_jan_1st, wait_for_weather_timesensor +from airflow3_bdit_dag_utils.utils.common_tasks import check_jan_1st, wait_for_weather_timesensor, check_if_dow + +DAG_NAME = 'vds_pull_vdsdata' +DAG_OWNERS = owners.get(DAG_NAME, ['Unknown']) README_PATH = os.path.join(repo_path, 'volumes/vds/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) @@ -77,10 +79,11 @@ def pull_and_insert_commsdeviceconfig(): vds_bot = PostgresHook('vds_bot') pull_commsdeviceconfig(rds_conn = vds_bot, itsc_conn=itsc_bot) - t_done = ExternalTaskMarker( - task_id="done", - external_dag_id="vds_pull_vdsvehicledata", - external_task_id="starting_point" + t_done = TriggerDagRunOperator( + task_id="done", + trigger_dag_id="vds_pull_vdsvehicledata", + logical_date="{{ ds }}", + reset_dag_run=True ) [ @@ -172,21 +175,32 @@ def summarize_v15(): ) (summarize_v15_task, summarize_v15_bylane_task) >> summarize_volmes_daily_task + + @task.short_circuit(ignore_downstream_trigger_rules=False) + def check_dow(ds=None): + execution_date = datetime.strptime(ds, "%Y-%m-%d") - timedelta(days=1) + return execution_date.weekday() == 4 #only notify on Thursdays - t_done = ExternalTaskMarker( + t_done = TriggerDagRunOperator( task_id="done", - external_dag_id="vds_check", - external_task_id="starting_point" + trigger_dag_id="vds_check", + logical_date="{{ ds }}", + reset_dag_run=True ) - - @task_group + + t_empty = EmptyOperator( + task_id="continue", + trigger_rule=TriggerRule.NONE_FAILED + ) + + @task_group() def data_checks(): "Data quality checks which may warrant re-running the DAG." divisions = [2, ] #div8001 is never summarized and the query on the view is not optimized for divid in divisions: check_avg_rows = SQLCheckOperatorWithReturnValue( - on_failure_callback=slack_alert_data_quality, + on_failure_callback=partial(slack_alert_data_quality, use_proxy=True), task_id=f"check_rows_vdsdata_div{divid}", sql="select-row_count_lookback.sql", conn_id='vds_bot', @@ -205,8 +219,9 @@ def data_checks(): check_partitions() ] >> pull_vdsdata() >> - summarize_v15() >> t_done >> - data_checks() + summarize_v15() >> + check_dow() >> t_done >> + t_empty >> data_checks() ] vdsdata_dag() \ No newline at end of file diff --git a/dags/vds_pull_vdsvehicledata.py b/dags/vds_pull_vdsvehicledata.py index 668cbc390..72e15f01b 100755 --- a/dags/vds_pull_vdsvehicledata.py +++ b/dags/vds_pull_vdsvehicledata.py @@ -1,31 +1,32 @@ import os import sys -from airflow.decorators import dag, task_group, task +import pendulum +from functools import partial from datetime import datetime, timedelta + +from airflow.sdk import dag, task_group, task from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.models import Variable -from functools import partial -from airflow.sensors.external_task import ExternalTaskSensor - -DAG_NAME = 'vds_pull_vdsvehicledata' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ['Unknown']) repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) +from dags.dag_owners import owners from volumes.vds.py.vds_functions import pull_raw_vdsvehicledata from airflow3_bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd from airflow3_bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue from airflow3_bdit_dag_utils.utils.common_tasks import check_jan_1st, wait_for_weather_timesensor +DAG_NAME = 'vds_pull_vdsvehicledata' +DAG_OWNERS = owners.get(DAG_NAME, ['Unknown']) + README_PATH = os.path.join(repo_path, 'volumes/vds/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) default_args = { 'owner': ','.join(DAG_OWNERS), 'depends_on_past': False, - 'start_date': datetime(2021, 11, 1), + 'start_date': pendulum.datetime(2021, 11, 1, tz="America/Toronto"), 'email_on_failure': False, 'email_on_retry': False, 'retries': 5, @@ -45,20 +46,10 @@ ], doc_md=DOC_MD, tags=["bdit_data-sources", 'vds', 'vdsvehicledata', 'data_checks', 'data_pull'], - schedule='5 4 * * *' #daily at 4:05am + schedule=None #triggered by vds_pull_vdsdata ) def vdsvehicledata_dag(): - t_upstream_done = ExternalTaskSensor( - task_id="starting_point", - external_dag_id="vds_pull_vdsdata", - external_task_id="update_inventories.done", - poke_interval=3600, #retry hourly - mode="reschedule", - timeout=86400, #one day - execution_delta=timedelta(minutes=5) - ) - #this task group checks if all necessary partitions exist and if not executes create functions. @task_group(group_id='check_partitions') def check_partitions_TG(): @@ -132,7 +123,7 @@ def data_checks(): "Data quality checks which may warrant re-running the DAG." check_avg_rows = SQLCheckOperatorWithReturnValue( - on_failure_callback=slack_alert_data_quality, + on_failure_callback=partial(slack_alert_data_quality, use_proxy=True), task_id=f"check_rows_veh_speeds", sql="select-row_count_lookback.sql", conn_id='vds_bot', @@ -145,6 +136,6 @@ def data_checks(): ) wait_for_weather_timesensor() >> check_avg_rows - [t_upstream_done, check_partitions_TG()] >> pull_vdsvehicledata() >> summarize_vdsvehicledata() >> data_checks() + check_partitions_TG() >> pull_vdsvehicledata() >> summarize_vdsvehicledata() >> data_checks() vdsvehicledata_dag() \ No newline at end of file diff --git a/dags/vz_google_sheets.py b/dags/vz_google_sheets.py index ac360a114..ab761e8c9 100644 --- a/dags/vz_google_sheets.py +++ b/dags/vz_google_sheets.py @@ -13,23 +13,23 @@ from datetime import timedelta from dateutil.parser import parse from googleapiclient.discovery import build + +from airflow.sdk import dag, task, get_current_context, Variable, Param +from airflow.exceptions import AirflowFailException from airflow.providers.postgres.hooks.postgres import PostgresHook -from airflow.models import Variable -from airflow.models.param import Param from airflow.providers.google.common.hooks.base_google import GoogleBaseHook -from airflow.decorators import dag, task -from airflow.exceptions import AirflowFailException # import custom operators and helper functions repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) +from dags.dag_owners import owners # pylint: disable=wrong-import-position from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert from gis.school_safety_zones.schools import pull_from_sheet # pylint: enable=wrong-import-position DAG_NAME = 'vz_google_sheets' -DAG_OWNERS = Variable.get("dag_owners", deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) DEFAULT_ARGS = { 'owner': ','.join(DAG_OWNERS), @@ -85,7 +85,6 @@ def pull_data( AirflowFailException: If any invalid record is found. """ #get name for mapped task - from airflow.operators.python import get_current_context context = get_current_context() context["task_year"] = spreadsheet["year"] diff --git a/dags/weather_pull.py b/dags/weather_pull.py index cc9c2973b..d2190942f 100644 --- a/dags/weather_pull.py +++ b/dags/weather_pull.py @@ -7,23 +7,19 @@ """ import os import sys -from pendulum import datetime, duration +import pendulum from datetime import timedelta, time -from airflow.decorators import dag, task +from airflow.sdk import dag, task from airflow.providers.postgres.hooks.postgres import PostgresHook -from airflow.models import Variable -from airflow.operators.latest_only import LatestOnlyOperator -from airflow.sensors.time_sensor import TimeSensor - -# DAG Information -DAG_NAME = 'weather_pull' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +from airflow.providers.standard.operators.latest_only import LatestOnlyOperator +from airflow.providers.standard.sensors.time import TimeSensor #import python scripts try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from weather.prediction_import import prediction_upsert from weather.historical_scrape import historical_upsert from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert @@ -32,11 +28,13 @@ raise ImportError("script import failed") #DAG - +DAG_NAME = 'weather_pull' +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) + default_args = { 'owner': ','.join(DAG_OWNERS), 'depends_on_past':False, - 'start_date': datetime(2024, 6, 3, tz="America/Toronto"), + 'start_date': pendulum.datetime(2024, 6, 3, tz="America/Toronto"), 'email_on_failure': False, 'email_on_success': False, 'retries': 0, @@ -77,7 +75,7 @@ def pull_prediction(): @task( retries=1, - retry_delay=duration(hours=9) #late arriving data arrives 9 hours later: https://climate.weather.gc.ca/FAQ_e.html#Q17 + retry_delay=pendulum.duration(hours=9) #late arriving data arrives 9 hours later: https://climate.weather.gc.ca/FAQ_e.html#Q17 ) def pull_historical(station_id, ds=None): historical_upsert( diff --git a/dags/wys_check.py b/dags/wys_check.py index 9de84cc18..ab0704c72 100644 --- a/dags/wys_check.py +++ b/dags/wys_check.py @@ -5,22 +5,22 @@ import os import sys import pendulum - from datetime import timedelta -from airflow.models import Variable -from airflow.decorators import dag, task_group -from airflow.sensors.external_task import ExternalTaskSensor + +from airflow.sdk import dag, task_group +from airflow.providers.standard.sensors.external_task import ExternalTaskSensor try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, slack_alert_data_quality, get_readme_docmd from bdit_dag_utils.utils.custom_operators import SQLCheckOperatorWithReturnValue except: raise ImportError("Cannot import functions to pull watch your speed data") DAG_NAME = 'wys_check' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'wys/api/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/dags/wys_monthly_summary.py b/dags/wys_monthly_summary.py new file mode 100644 index 000000000..7665b9713 --- /dev/null +++ b/dags/wys_monthly_summary.py @@ -0,0 +1,105 @@ +""" +Refresh WYS Materialized Views and run monthly aggregation function for Open Data. +A Slack notification is raised when the airflow process fails. +""" +import sys +import os +import pendulum +from dateutil.relativedelta import relativedelta +from datetime import datetime, timedelta + +from airflow.sdk import dag +from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator + +repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) +sys.path.insert(0, repo_path) +from dags.dag_owners import owners +from bdit_dag_utils.utils.dag_functions import task_fail_slack_alert, get_readme_docmd + +dag_name = 'wys_monthly_summary' +names = owners.get(dag_name, ['Unknown']) #find dag owners w/default = Unknown + +README_PATH = os.path.join(repo_path, 'wys/api/readme.md') +DOC_MD = get_readme_docmd(README_PATH, dag_name) + +default_args = { + 'owner': ','.join(names), + 'depends_on_past':False, + 'start_date': pendulum.datetime(2020, 4, 30, tz="America/Toronto"), + 'email_on_failure': False, + 'email_on_success': False, + 'retries': 0, + 'retry_delay': timedelta(minutes=5), + 'on_failure_callback': task_fail_slack_alert +} + +def last_month(ds): + dt = datetime.strptime(ds, "%Y-%m-%d") + # Don't need to add `months=1` because that seems to be taken care of with + # the monthly scheduling + return (dt - relativedelta(day=1)).strftime("%Y-%m-%d") + +@dag( + dag_id = dag_name, + default_args=default_args, + max_active_runs=1, + tags=["wys", "aggregation", "monthly"], + doc_md = DOC_MD, + user_defined_macros={ + 'last_month' : last_month + }, + schedule='0 3 2 * *' +) +def monthly_summary(): + wys_view_stat_signs = SQLExecuteQueryOperator( + #sql in bdit_data-sources/wys/api/sql/mat-view-stationary-signs.sql + sql='SELECT wys.refresh_mat_view_stationary_signs()', + task_id='wys_view_stat_signs', + conn_id='wys_bot', + autocommit=True, + retries = 0 + ) + + wys_view_mobile_api_id = SQLExecuteQueryOperator( + #sql in bdit_data-sources/wys/api/sql/function-refresh_mat_view_mobile_api_id.sql + #sql in bdit_data-sources/wys/api/sql/create-view-mobile_api_id.sql + sql='SELECT wys.refresh_mat_view_mobile_api_id()', + task_id='wys_view_mobile_api_id', + conn_id='wys_bot', + autocommit=True, + retries = 0 + ) + + od_wys_view = SQLExecuteQueryOperator( + #sql in bdit_data-sources/wys/api/sql/open_data/mat-view-stationary-locations.sql + sql='SELECT wys.refresh_od_mat_view()', + task_id='od_wys_view', + conn_id='wys_bot', + autocommit=True, + retries = 0 + ) + + wys_mobile_summary = SQLExecuteQueryOperator( + #sql in bdit_data-sources/wys/api/sql/function-mobile-summary.sql + sql="SELECT wys.mobile_summary_for_month('{{ last_month(ds) }}')", + task_id='wys_mobile_summary', + conn_id='wys_bot', + autocommit=True, + retries = 0 + ) + + wys_stat_summary = SQLExecuteQueryOperator( + #sql in bdit_data-sources/wys/api/sql/function-stationary-sign-summary.sql + sql="SELECT wys.stationary_summary_for_month('{{ last_month(ds) }}')", + task_id='wys_stat_summary', + conn_id='wys_bot', + autocommit=True, + retries = 0 + ) + + # Stationary signs + wys_view_stat_signs >> [wys_stat_summary, od_wys_view] + # Mobile signs + wys_view_mobile_api_id >> wys_mobile_summary + +monthly_summary() \ No newline at end of file diff --git a/dags/wys_pull.py b/dags/wys_pull.py index 9cea0be4c..3b80f9c22 100644 --- a/dags/wys_pull.py +++ b/dags/wys_pull.py @@ -9,18 +9,17 @@ from datetime import timedelta from googleapiclient.discovery import build -from airflow.operators.python import get_current_context +from airflow.sdk import task, dag, task_group, run_if, get_current_context +from airflow.providers.standard.sensors.external_task import ExternalTaskMarker from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.models import Variable -from airflow.decorators import task, dag, task_group, run_if -from airflow.sensors.external_task import ExternalTaskMarker from airflow.providers.google.common.hooks.base_google import GoogleBaseHook from airflow.exceptions import AirflowFailException try: repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) sys.path.insert(0, repo_path) + from dags.dag_owners import owners from wys.api.python.wys_api import ( get_schedules, agg_1hr_5kph, get_location_ids, get_api_key, get_data_for_date, update_locations @@ -33,7 +32,7 @@ raise ImportError("Cannot import functions to pull watch your speed data") DAG_NAME = 'wys_pull' -DAG_OWNERS = Variable.get('dag_owners', deserialize_json=True).get(DAG_NAME, ["Unknown"]) +DAG_OWNERS = owners.get(DAG_NAME, ["Unknown"]) README_PATH = os.path.join(repo_path, 'wys/api/readme.md') DOC_MD = get_readme_docmd(README_PATH, DAG_NAME) diff --git a/requirements.in b/requirements.in index 63fae9227..cc6314321 100644 --- a/requirements.in +++ b/requirements.in @@ -1,11 +1,11 @@ -# Airflow constraints pulled from https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-no-providers-3.10.txt +# Airflow constraints pulled from https://raw.githubusercontent.com/apache/airflow/constraints-3.0.0/constraints-no-providers-3.10.txt -c constraints.txt # Airflow -apache-airflow==2.10.2 +apache-airflow==3.0.2 # Airflow service providers -apache-airflow-providers-celery +apache-airflow-providers-standard apache-airflow-providers-google apache-airflow-providers-postgres apache-airflow-providers-slack @@ -20,7 +20,7 @@ pip-tools==7.4.1 # pipelines bs4==0.0.1 -env-canada==0.6.1 +env-canada==0.7.1 geopandas==1.0.1 holidays==0.31 requests-oauthlib @@ -30,8 +30,7 @@ ipython ipykernel # linters -pydocstyle==6.3.0 -pylint==2.17.5 +ruff # testing pytest diff --git a/requirements.txt b/requirements.txt index 71e73e276..3c781bce8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,164 +2,156 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile +# pip-compile requirements.in # -aiofiles==23.2.1 - # via gcloud-aio-storage -aiohappyeyeballs==2.4.0 +a2wsgi==1.10.8 # via # -c constraints.txt - # aiohttp -aiohttp==3.10.5 + # apache-airflow-core +aiofiles==24.1.0 + # via gcloud-aio-storage +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.12.13 # via - # -c constraints.txt + # aiohttp-cors # apache-airflow-providers-http # env-canada # gcloud-aio-auth # gcsfs -aiosignal==1.3.1 + # litellm + # ray +aiohttp-cors==0.8.1 + # via ray +aiologic==0.14.0 # via # -c constraints.txt + # apache-airflow-task-sdk +aiosignal==1.4.0 + # via # aiohttp -alembic==1.13.2 + # ray +aiosqlite==0.21.0 # via # -c constraints.txt - # apache-airflow - # sqlalchemy-spanner -amqp==5.2.0 + # apache-airflow-core +alembic==1.16.1 # via # -c constraints.txt - # kombu + # apache-airflow-core + # sqlalchemy-spanner annotated-types==0.7.0 # via # -c constraints.txt # pydantic -anyio==4.4.0 +anyio==4.9.0 # via # -c constraints.txt # httpx -apache-airflow==2.10.2 + # openai + # starlette + # watchfiles +apache-airflow==3.0.2 # via # -r requirements.in - # apache-airflow-providers-celery # apache-airflow-providers-common-compat # apache-airflow-providers-common-io # apache-airflow-providers-common-sql - # apache-airflow-providers-fab # apache-airflow-providers-ftp # apache-airflow-providers-google # apache-airflow-providers-http - # apache-airflow-providers-imap # apache-airflow-providers-postgres # apache-airflow-providers-slack # apache-airflow-providers-smtp - # apache-airflow-providers-sqlite -apache-airflow-providers-celery==3.8.3 - # via -r requirements.in -apache-airflow-providers-common-compat==1.2.1 + # apache-airflow-providers-standard +apache-airflow-core==3.0.2 # via # apache-airflow - # apache-airflow-providers-fab + # apache-airflow-task-sdk +apache-airflow-providers-common-compat==1.7.2 + # via + # apache-airflow-core # apache-airflow-providers-google -apache-airflow-providers-common-io==1.4.2 - # via apache-airflow -apache-airflow-providers-common-sql==1.18.0 + # apache-airflow-providers-slack + # apache-airflow-providers-smtp +apache-airflow-providers-common-io==1.6.1 + # via apache-airflow-core +apache-airflow-providers-common-sql==1.27.3 # via - # apache-airflow + # apache-airflow-core # apache-airflow-providers-google # apache-airflow-providers-postgres # apache-airflow-providers-slack - # apache-airflow-providers-sqlite -apache-airflow-providers-fab==1.4.1 - # via apache-airflow -apache-airflow-providers-ftp==3.11.1 - # via - # -r requirements.in - # apache-airflow -apache-airflow-providers-google==10.24.0 +apache-airflow-providers-ftp==3.13.1 # via -r requirements.in -apache-airflow-providers-http==4.13.1 - # via - # -r requirements.in - # apache-airflow -apache-airflow-providers-imap==3.7.0 - # via apache-airflow -apache-airflow-providers-postgres==5.13.1 +apache-airflow-providers-google==16.1.0 + # via -r requirements.in +apache-airflow-providers-http==5.3.2 # via -r requirements.in -apache-airflow-providers-slack==8.9.0 +apache-airflow-providers-postgres==6.2.1 # via -r requirements.in -apache-airflow-providers-smtp==1.8.0 - # via apache-airflow -apache-airflow-providers-sqlite==3.9.0 - # via apache-airflow -apispec[yaml]==6.6.1 +apache-airflow-providers-slack==9.1.2 + # via -r requirements.in +apache-airflow-providers-smtp==2.1.1 + # via apache-airflow-core +apache-airflow-providers-standard==1.4.0 # via - # -c constraints.txt - # flask-appbuilder -argcomplete==3.5.0 + # -r requirements.in + # apache-airflow-core +apache-airflow-task-sdk==1.0.2 # via - # -c constraints.txt # apache-airflow + # apache-airflow-core +argcomplete==3.6.2 + # via + # -c constraints.txt + # apache-airflow-core asgiref==3.8.1 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # apache-airflow-providers-google # apache-airflow-providers-http -astroid==2.15.8 - # via pylint -asttokens==2.4.1 +asttokens==3.0.0 # via stack-data -async-timeout==4.0.3 +async-timeout==5.0.1 # via - # -c constraints.txt # aiohttp - # redis -attrs==24.2.0 + # asyncpg +asyncpg==0.30.0 + # via apache-airflow-providers-postgres +attrs==25.3.0 # via # -c constraints.txt # aiohttp - # apache-airflow + # apache-airflow-core + # apache-airflow-task-sdk # cattrs # jsonschema # looker-sdk # referencing -babel==2.16.0 - # via - # -c constraints.txt - # flask-babel + # svcs backoff==2.2.1 + # via gcloud-aio-auth +backports-strenum==1.3.1 # via # -c constraints.txt - # gcloud-aio-auth - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -beautifulsoup4==4.12.3 + # cadwyn +beautifulsoup4==4.13.4 # via bs4 -billiard==4.2.1 - # via celery -blinker==1.8.2 - # via - # -c constraints.txt - # apache-airflow bs4==0.0.1 # via -r requirements.in build==1.2.2.post1 # via pip-tools -cachelib==0.9.0 +cachetools==5.5.2 + # via google-auth +cadwyn==5.3.3 # via # -c constraints.txt - # flask-caching - # flask-session -cachetools==5.5.0 - # via google-auth -cattrs==24.1.2 + # apache-airflow-core +cattrs==25.1.1 # via looker-sdk -celery[redis]==5.4.0 - # via - # apache-airflow-providers-celery - # flower -certifi==2024.8.30 +certifi==2025.4.26 # via # -c constraints.txt # httpcore @@ -173,187 +165,130 @@ cffi==1.17.1 # cryptography chardet==5.2.0 # via gcloud-aio-auth -charset-normalizer==3.3.2 +charset-normalizer==3.4.2 # via # -c constraints.txt # requests -click==8.1.7 +click==8.1.8 # via # -c constraints.txt - # celery - # click-didyoumean - # click-plugins - # click-repl - # clickclick # flask - # flask-appbuilder + # litellm # pip-tools -click-didyoumean==0.3.1 - # via celery -click-plugins==1.1.1 - # via celery -click-repl==0.3.0 - # via celery -clickclick==20.10.2 - # via - # -c constraints.txt - # connexion -colorama==0.4.6 - # via - # -c constraints.txt - # flask-appbuilder -colorlog==6.8.2 + # ray + # rich-toolkit + # typer + # uvicorn +colorful==0.5.7 + # via ray +colorlog==6.9.0 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core comm==0.2.2 # via ipykernel -configupdater==3.2 - # via - # -c constraints.txt - # apache-airflow -connexion[flask]==2.14.2 - # via - # -c constraints.txt - # apache-airflow cron-descriptor==1.4.5 # via # -c constraints.txt - # apache-airflow -croniter==3.0.3 + # apache-airflow-core +croniter==6.0.0 # via # -c constraints.txt - # apache-airflow -cryptography==43.0.1 + # apache-airflow-core +cryptography==42.0.8 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # gcloud-aio-auth # pyopenssl -db-dtypes==1.3.0 +db-dtypes==1.4.3 # via pandas-gbq -debugpy==1.8.7 +debugpy==1.8.14 # via ipykernel -decorator==5.1.1 +decorator==5.2.1 # via - # -c constraints.txt # gcsfs # ipython defusedxml==0.7.1 # via env-canada -deprecated==1.2.14 +deprecated==1.2.18 # via # -c constraints.txt - # apache-airflow - # limits + # apache-airflow-core # opentelemetry-api -dill==0.3.8 + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions +dill==0.3.1.1 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # apache-airflow-providers-google - # pylint -dnspython==2.6.1 +distlib==0.3.9 + # via virtualenv +distro==1.9.0 + # via openai +dnspython==2.7.0 # via # -c constraints.txt # email-validator docstring-parser==0.16 # via google-cloud-aiplatform -docutils==0.21.2 - # via - # -c constraints.txt - # python-daemon email-validator==2.2.0 # via # -c constraints.txt - # flask-appbuilder -env-canada==0.6.1 + # fastapi +env-canada==0.7.1 # via -r requirements.in -exceptiongroup==1.2.2 +exceptiongroup==1.3.0 # via # -c constraints.txt # anyio # cattrs # ipython # pytest -executing==2.1.0 +executing==2.2.0 # via stack-data -flask==2.2.5 - # via - # -c constraints.txt - # apache-airflow - # apache-airflow-providers-fab - # connexion - # flask-appbuilder - # flask-babel - # flask-caching - # flask-jwt-extended - # flask-limiter - # flask-login - # flask-session - # flask-sqlalchemy - # flask-wtf -flask-appbuilder==4.5.0 - # via - # -c constraints.txt - # apache-airflow-providers-fab -flask-babel==2.0.0 - # via - # -c constraints.txt - # flask-appbuilder -flask-caching==2.3.0 - # via - # -c constraints.txt - # apache-airflow -flask-jwt-extended==4.6.0 - # via - # -c constraints.txt - # flask-appbuilder -flask-limiter==3.8.0 +fastapi[standard]==0.115.12 # via # -c constraints.txt - # flask-appbuilder -flask-login==0.6.3 + # apache-airflow-core + # cadwyn +fastapi-cli[standard]==0.0.7 # via # -c constraints.txt - # apache-airflow-providers-fab - # flask-appbuilder -flask-session==0.5.0 + # fastapi +filelock==3.18.0 # via - # -c constraints.txt - # apache-airflow -flask-sqlalchemy==2.5.1 - # via - # -c constraints.txt - # flask-appbuilder -flask-wtf==1.2.1 + # huggingface-hub + # ray + # virtualenv +flask==2.2.5 # via # -c constraints.txt - # apache-airflow - # flask-appbuilder -flower==2.0.1 - # via apache-airflow-providers-celery -frozenlist==1.4.1 + # apache-airflow-core +frozenlist==1.7.0 # via - # -c constraints.txt # aiohttp # aiosignal -fsspec==2024.9.0 + # ray +fsspec==2025.5.1 # via # -c constraints.txt - # apache-airflow + # apache-airflow-task-sdk # gcsfs + # huggingface-hub # universal-pathlib -gcloud-aio-auth==5.3.2 +gcloud-aio-auth==5.4.2 # via # apache-airflow-providers-google # gcloud-aio-bigquery # gcloud-aio-storage gcloud-aio-bigquery==7.1.0 # via apache-airflow-providers-google -gcloud-aio-storage==9.3.0 +gcloud-aio-storage==9.5.0 # via apache-airflow-providers-google -gcsfs==2024.9.0.post1 +gcsfs==2025.5.1 # via apache-airflow-providers-google geographiclib==2.0 # via geopy @@ -361,22 +296,24 @@ geopandas==1.0.1 # via -r requirements.in geopy==2.4.1 # via env-canada -google-ads==25.0.0 +google-ads==27.0.0 # via apache-airflow-providers-google -google-analytics-admin==0.23.0 +google-analytics-admin==0.24.1 # via apache-airflow-providers-google -google-api-core[grpc]==2.21.0 +google-api-core[grpc]==2.25.1 # via # apache-airflow-providers-google # google-ads # google-analytics-admin # google-api-python-client # google-cloud-aiplatform + # google-cloud-alloydb # google-cloud-appengine-logging # google-cloud-automl # google-cloud-batch # google-cloud-bigquery # google-cloud-bigquery-datatransfer + # google-cloud-bigquery-storage # google-cloud-bigtable # google-cloud-build # google-cloud-compute @@ -392,6 +329,7 @@ google-api-core[grpc]==2.21.0 # google-cloud-kms # google-cloud-language # google-cloud-logging + # google-cloud-managedkafka # google-cloud-memcache # google-cloud-monitoring # google-cloud-orchestration-airflow @@ -411,11 +349,12 @@ google-api-core[grpc]==2.21.0 # google-cloud-videointelligence # google-cloud-vision # google-cloud-workflows + # opencensus # pandas-gbq # sqlalchemy-bigquery -google-api-python-client==2.149.0 +google-api-python-client==2.175.0 # via apache-airflow-providers-google -google-auth==2.35.0 +google-auth==2.40.3 # via # apache-airflow-providers-google # gcsfs @@ -425,11 +364,13 @@ google-auth==2.35.0 # google-auth-httplib2 # google-auth-oauthlib # google-cloud-aiplatform + # google-cloud-alloydb # google-cloud-appengine-logging # google-cloud-automl # google-cloud-batch # google-cloud-bigquery # google-cloud-bigquery-datatransfer + # google-cloud-bigquery-storage # google-cloud-bigtable # google-cloud-build # google-cloud-compute @@ -445,6 +386,7 @@ google-auth==2.35.0 # google-cloud-kms # google-cloud-language # google-cloud-logging + # google-cloud-managedkafka # google-cloud-memcache # google-cloud-monitoring # google-cloud-orchestration-airflow @@ -463,6 +405,7 @@ google-auth==2.35.0 # google-cloud-videointelligence # google-cloud-vision # google-cloud-workflows + # google-genai # pandas-gbq # pydata-google-auth # sqlalchemy-bigquery @@ -470,39 +413,43 @@ google-auth-httplib2==0.2.0 # via # apache-airflow-providers-google # google-api-python-client -google-auth-oauthlib==1.2.1 +google-auth-oauthlib==1.2.2 # via # gcsfs # google-ads # pandas-gbq # pydata-google-auth -google-cloud-aiplatform==1.70.0 +google-cloud-aiplatform[evaluation,ray]==1.101.0 # via apache-airflow-providers-google -google-cloud-appengine-logging==1.4.5 +google-cloud-alloydb==0.4.8 + # via apache-airflow-providers-google +google-cloud-appengine-logging==1.6.2 # via google-cloud-logging -google-cloud-audit-log==0.3.0 +google-cloud-audit-log==0.3.2 # via google-cloud-logging -google-cloud-automl==2.13.5 +google-cloud-automl==2.16.4 # via apache-airflow-providers-google -google-cloud-batch==0.17.29 +google-cloud-batch==0.17.36 # via apache-airflow-providers-google -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.34.0 # via # apache-airflow-providers-google # google-cloud-aiplatform # pandas-gbq # sqlalchemy-bigquery -google-cloud-bigquery-datatransfer==3.16.0 +google-cloud-bigquery-datatransfer==3.19.2 # via apache-airflow-providers-google -google-cloud-bigtable==2.26.0 +google-cloud-bigquery-storage==2.32.0 + # via google-cloud-aiplatform +google-cloud-bigtable==2.31.0 # via apache-airflow-providers-google -google-cloud-build==3.25.0 +google-cloud-build==3.31.2 # via apache-airflow-providers-google -google-cloud-compute==1.19.2 +google-cloud-compute==1.31.0 # via apache-airflow-providers-google -google-cloud-container==2.52.0 +google-cloud-container==2.57.0 # via apache-airflow-providers-google -google-cloud-core==2.4.1 +google-cloud-core==2.4.3 # via # google-cloud-bigquery # google-cloud-bigtable @@ -510,84 +457,83 @@ google-cloud-core==2.4.1 # google-cloud-spanner # google-cloud-storage # google-cloud-translate -google-cloud-datacatalog==3.20.1 +google-cloud-datacatalog==3.27.1 # via apache-airflow-providers-google -google-cloud-dataflow-client==0.8.12 +google-cloud-dataflow-client==0.9.0 # via apache-airflow-providers-google -google-cloud-dataform==0.5.11 +google-cloud-dataform==0.6.2 # via apache-airflow-providers-google -google-cloud-dataplex==2.2.2 +google-cloud-dataplex==2.10.2 # via apache-airflow-providers-google -google-cloud-dataproc==5.13.0 +google-cloud-dataproc==5.21.0 # via apache-airflow-providers-google -google-cloud-dataproc-metastore==1.15.5 +google-cloud-dataproc-metastore==1.18.3 # via apache-airflow-providers-google -google-cloud-dlp==3.23.0 +google-cloud-dlp==3.31.0 # via apache-airflow-providers-google -google-cloud-kms==3.0.0 +google-cloud-kms==3.5.1 # via apache-airflow-providers-google -google-cloud-language==2.14.0 +google-cloud-language==2.17.2 # via apache-airflow-providers-google -google-cloud-logging==3.11.2 +google-cloud-logging==3.12.1 # via apache-airflow-providers-google -google-cloud-memcache==1.9.5 +google-cloud-managedkafka==0.1.11 # via apache-airflow-providers-google -google-cloud-monitoring==2.22.2 +google-cloud-memcache==1.12.2 # via apache-airflow-providers-google -google-cloud-orchestration-airflow==1.14.0 +google-cloud-monitoring==2.27.2 # via apache-airflow-providers-google -google-cloud-os-login==2.14.6 +google-cloud-orchestration-airflow==1.17.5 # via apache-airflow-providers-google -google-cloud-pubsub==2.25.0 +google-cloud-os-login==2.17.2 # via apache-airflow-providers-google -google-cloud-redis==2.15.5 +google-cloud-pubsub==2.30.0 # via apache-airflow-providers-google -google-cloud-resource-manager==1.12.5 +google-cloud-redis==2.18.1 + # via apache-airflow-providers-google +google-cloud-resource-manager==1.14.2 # via google-cloud-aiplatform -google-cloud-run==0.10.9 +google-cloud-run==0.10.18 # via apache-airflow-providers-google -google-cloud-secret-manager==2.20.2 +google-cloud-secret-manager==2.24.0 # via apache-airflow-providers-google -google-cloud-spanner==3.49.1 +google-cloud-spanner==3.55.0 # via # apache-airflow-providers-google # sqlalchemy-spanner -google-cloud-speech==2.27.0 +google-cloud-speech==2.33.0 # via apache-airflow-providers-google -google-cloud-storage==2.18.2 +google-cloud-storage==2.19.0 # via # apache-airflow-providers-google # gcsfs # google-cloud-aiplatform -google-cloud-storage-transfer==1.12.0 +google-cloud-storage-transfer==1.17.0 # via apache-airflow-providers-google -google-cloud-tasks==2.16.5 +google-cloud-tasks==2.19.3 # via apache-airflow-providers-google -google-cloud-texttospeech==2.18.0 +google-cloud-texttospeech==2.27.0 # via apache-airflow-providers-google -google-cloud-translate==3.16.0 +google-cloud-translate==3.21.1 # via apache-airflow-providers-google -google-cloud-videointelligence==2.13.5 +google-cloud-videointelligence==2.16.2 # via apache-airflow-providers-google -google-cloud-vision==3.7.4 +google-cloud-vision==3.10.2 # via apache-airflow-providers-google -google-cloud-workflows==1.14.5 +google-cloud-workflows==1.18.2 # via apache-airflow-providers-google -google-crc32c==1.6.0 +google-crc32c==1.7.1 # via + # google-cloud-bigtable # google-cloud-storage # google-resumable-media -google-re2==1.1.20240702 - # via - # -c constraints.txt - # apache-airflow - # apache-airflow-providers-celery - # apache-airflow-providers-fab +google-genai==1.2.0 + # via google-cloud-aiplatform google-resumable-media==2.7.2 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos[grpc]==1.65.0 +googleapis-common-protos[grpc]==1.70.0 # via # -c constraints.txt # google-ads @@ -597,16 +543,15 @@ googleapis-common-protos[grpc]==1.65.0 # grpcio-status # opentelemetry-exporter-otlp-proto-grpc # opentelemetry-exporter-otlp-proto-http -graphviz==0.20.3 - # via - # -c constraints.txt - # -r requirements.in -greenlet==3.1.0 +graphviz==0.21 + # via -r requirements.in +greenlet==3.2.2 # via # -c constraints.txt # sqlalchemy -grpc-google-iam-v1==0.13.1 +grpc-google-iam-v1==0.14.2 # via + # google-cloud-alloydb # google-cloud-bigtable # google-cloud-build # google-cloud-datacatalog @@ -625,7 +570,7 @@ grpc-google-iam-v1==0.13.1 # google-cloud-translate grpc-interceptor==0.15.4 # via google-cloud-spanner -grpcio==1.66.1 +grpcio==1.72.1 # via # -c constraints.txt # google-ads @@ -637,6 +582,7 @@ grpcio==1.66.1 # grpcio-gcp # grpcio-status # opentelemetry-exporter-otlp-proto-grpc + # ray grpcio-gcp==0.2.2 # via apache-airflow-providers-google grpcio-status==1.62.3 @@ -647,14 +593,17 @@ grpcio-status==1.62.3 gunicorn==23.0.0 # via # -c constraints.txt - # apache-airflow -h11==0.14.0 + # apache-airflow-core +h11==0.16.0 # via # -c constraints.txt # httpcore + # uvicorn +hf-xet==1.1.5 + # via huggingface-hub holidays==0.31 # via -r requirements.in -httpcore==1.0.5 +httpcore==1.0.9 # via # -c constraints.txt # httpx @@ -662,13 +611,22 @@ httplib2==0.22.0 # via # google-api-python-client # google-auth-httplib2 -httpx==0.27.2 +httptools==0.6.4 # via # -c constraints.txt - # apache-airflow + # uvicorn +httpx==0.27.0 + # via + # -c constraints.txt + # apache-airflow-core # apache-airflow-providers-google -humanize==4.11.0 - # via flower + # apache-airflow-task-sdk + # fastapi + # litellm + # openai + # retryhttp +huggingface-hub==0.33.2 + # via tokenizers idna==3.10 # via # -c constraints.txt @@ -677,138 +635,104 @@ idna==3.10 # httpx # requests # yarl -imageio==2.36.0 +imageio==2.37.0 # via env-canada -immutabledict==4.2.0 - # via apache-airflow-providers-google -importlib-metadata==8.5.0 +immutabledict==4.2.1 # via - # -c constraints.txt - # apache-airflow -importlib-resources==6.4.5 - # via - # -c constraints.txt - # limits -inflection==0.5.1 + # apache-airflow-providers-google + # google-cloud-aiplatform +importlib-metadata==8.4.0 # via # -c constraints.txt - # connexion -iniconfig==2.0.0 + # apache-airflow-core + # litellm + # opentelemetry-api +iniconfig==2.1.0 # via pytest ipykernel==6.29.5 # via -r requirements.in -ipython==8.28.0 +ipython==8.37.0 # via # -r requirements.in # ipykernel -isort==5.13.2 - # via pylint itsdangerous==2.2.0 # via # -c constraints.txt - # apache-airflow - # connexion + # apache-airflow-core # flask - # flask-wtf -jedi==0.19.1 +jedi==0.19.2 # via ipython -jinja2==3.1.4 +jinja2==3.1.6 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core + # apache-airflow-task-sdk + # cadwyn + # fastapi # flask - # flask-babel - # python-nvd3 -jmespath==1.0.1 + # litellm +jiter==0.10.0 + # via openai +joblib==1.5.1 + # via scikit-learn +jsonschema==4.24.0 # via # -c constraints.txt - # apache-airflow-providers-fab -json-merge-patch==0.2 - # via apache-airflow-providers-google -jsonschema==4.23.0 - # via - # -c constraints.txt - # apache-airflow - # connexion - # flask-appbuilder -jsonschema-specifications==2023.12.1 + # apache-airflow-core + # google-cloud-aiplatform + # litellm + # ray +jsonschema-specifications==2025.4.1 # via # -c constraints.txt # jsonschema jupyter-client==8.6.3 # via ipykernel -jupyter-core==5.7.2 +jupyter-core==5.8.1 # via # ipykernel # jupyter-client -kombu==5.4.2 - # via celery -lazy-object-proxy==1.10.0 +lazy-object-proxy==1.11.0 # via # -c constraints.txt - # apache-airflow - # astroid -limits==3.13.0 + # apache-airflow-core +libcst==1.8.0 # via # -c constraints.txt - # flask-limiter + # apache-airflow-core linkify-it-py==2.0.3 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core +litellm==1.74.0.post1 + # via google-cloud-aiplatform lockfile==0.12.2 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # python-daemon -looker-sdk==24.18.0 +looker-sdk==25.10.0 # via apache-airflow-providers-google -lxml==5.3.0 - # via - # -c constraints.txt - # env-canada -mako==1.3.5 +lxml==6.0.0 + # via env-canada +mako==1.3.10 # via # -c constraints.txt # alembic markdown-it-py==3.0.0 # via # -c constraints.txt - # apache-airflow - # mdit-py-plugins # rich -markupsafe==2.1.5 +markupsafe==3.0.2 # via # -c constraints.txt - # apache-airflow # jinja2 # mako # werkzeug - # wtforms -marshmallow==3.22.0 - # via - # -c constraints.txt - # flask-appbuilder - # marshmallow-oneofschema - # marshmallow-sqlalchemy -marshmallow-oneofschema==3.1.1 - # via - # -c constraints.txt - # apache-airflow -marshmallow-sqlalchemy==0.28.2 - # via - # -c constraints.txt - # flask-appbuilder matplotlib-inline==0.1.7 # via # ipykernel # ipython -mccabe==0.7.0 - # via pylint -mdit-py-plugins==0.4.2 - # via - # -c constraints.txt - # apache-airflow mdurl==0.1.2 # via # -c constraints.txt @@ -816,153 +740,163 @@ mdurl==0.1.2 methodtools==0.4.7 # via # -c constraints.txt - # apache-airflow -more-itertools==10.5.0 + # apache-airflow-core + # apache-airflow-providers-common-sql + # apache-airflow-task-sdk +more-itertools==10.7.0 # via # -c constraints.txt # apache-airflow-providers-common-sql -multidict==6.1.0 +msgpack==1.1.1 + # via ray +msgspec==0.19.0 # via # -c constraints.txt + # apache-airflow-task-sdk +multidict==6.6.3 + # via # aiohttp # yarl nest-asyncio==1.6.0 # via ipykernel -numpy==1.26.4 +numpy==2.2.6 # via - # -c constraints.txt # db-dtypes # env-canada # geopandas # imageio # pandas # pandas-gbq - # pyarrow # pyogrio + # scikit-learn + # scipy # shapely -oauthlib==3.2.2 +oauthlib==3.3.1 # via requests-oauthlib -opentelemetry-api==1.16.0 +openai==1.93.1 + # via litellm +opencensus==0.11.4 + # via ray +opencensus-context==0.1.3 + # via opencensus +opentelemetry-api==1.27.0 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # google-cloud-logging # google-cloud-pubsub # opentelemetry-exporter-otlp-proto-grpc # opentelemetry-exporter-otlp-proto-http # opentelemetry-sdk -opentelemetry-exporter-otlp==1.16.0 + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.27.0 # via # -c constraints.txt - # apache-airflow -opentelemetry-exporter-otlp-proto-grpc==1.16.0 + # apache-airflow-core +opentelemetry-exporter-otlp-proto-common==1.27.0 + # via + # -c constraints.txt + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.27.0 # via # -c constraints.txt # opentelemetry-exporter-otlp -opentelemetry-exporter-otlp-proto-http==1.16.0 +opentelemetry-exporter-otlp-proto-http==1.27.0 # via # -c constraints.txt # opentelemetry-exporter-otlp -opentelemetry-proto==1.16.0 +opentelemetry-proto==1.27.0 # via # -c constraints.txt + # opentelemetry-exporter-otlp-proto-common # opentelemetry-exporter-otlp-proto-grpc # opentelemetry-exporter-otlp-proto-http -opentelemetry-sdk==1.16.0 +opentelemetry-sdk==1.27.0 # via # -c constraints.txt # google-cloud-pubsub # opentelemetry-exporter-otlp-proto-grpc # opentelemetry-exporter-otlp-proto-http -opentelemetry-semantic-conventions==0.37b0 +opentelemetry-semantic-conventions==0.48b0 # via # -c constraints.txt # opentelemetry-sdk -ordered-set==4.1.0 +packaging==24.2 # via # -c constraints.txt - # flask-limiter -packaging==24.1 - # via - # -c constraints.txt - # apache-airflow - # apispec + # apache-airflow-core # build - # connexion # db-dtypes # geopandas # google-cloud-aiplatform # google-cloud-bigquery # gunicorn + # huggingface-hub # ipykernel - # limits - # marshmallow - # marshmallow-sqlalchemy # pandas-gbq # pyogrio # pytest + # ray # sqlalchemy-bigquery -pandas==2.1.4 +pandas==2.3.1 # via - # -c constraints.txt - # apache-airflow-providers-google # db-dtypes # env-canada # geopandas + # google-cloud-aiplatform # pandas-gbq -pandas-gbq==0.24.0 +pandas-gbq==0.29.1 # via apache-airflow-providers-google parso==0.8.4 # via jedi pathspec==0.12.1 # via # -c constraints.txt - # apache-airflow -pendulum==3.0.0 + # apache-airflow-core +pendulum==3.1.0 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core + # apache-airflow-task-sdk pexpect==4.9.0 # via ipython -pillow==11.0.0 +pillow==11.3.0 # via # env-canada # imageio pip-tools==7.4.1 # via -r requirements.in -platformdirs==4.3.3 +platformdirs==4.3.8 # via - # -c constraints.txt # jupyter-core - # pylint -pluggy==1.5.0 + # virtualenv +pluggy==1.6.0 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # pytest -prison==0.2.1 - # via - # -c constraints.txt - # flask-appbuilder -prometheus-client==0.20.0 - # via - # -c constraints.txt - # flower -prompt-toolkit==3.0.48 +prometheus-client==0.22.1 + # via ray +prompt-toolkit==3.0.51 + # via ipython +propcache==0.3.2 # via - # click-repl - # ipython -proto-plus==1.24.0 + # aiohttp + # yarl +proto-plus==1.26.1 # via # apache-airflow-providers-google # google-ads # google-analytics-admin # google-api-core # google-cloud-aiplatform + # google-cloud-alloydb # google-cloud-appengine-logging # google-cloud-automl # google-cloud-batch # google-cloud-bigquery-datatransfer + # google-cloud-bigquery-storage # google-cloud-bigtable # google-cloud-build # google-cloud-compute @@ -977,6 +911,7 @@ proto-plus==1.24.0 # google-cloud-kms # google-cloud-language # google-cloud-logging + # google-cloud-managedkafka # google-cloud-memcache # google-cloud-monitoring # google-cloud-orchestration-airflow @@ -995,18 +930,20 @@ proto-plus==1.24.0 # google-cloud-videointelligence # google-cloud-vision # google-cloud-workflows -protobuf==4.25.4 +protobuf==4.25.8 # via # -c constraints.txt # google-ads # google-analytics-admin # google-api-core # google-cloud-aiplatform + # google-cloud-alloydb # google-cloud-appengine-logging # google-cloud-audit-log # google-cloud-automl # google-cloud-batch # google-cloud-bigquery-datatransfer + # google-cloud-bigquery-storage # google-cloud-bigtable # google-cloud-build # google-cloud-compute @@ -1021,6 +958,7 @@ protobuf==4.25.4 # google-cloud-kms # google-cloud-language # google-cloud-logging + # google-cloud-managedkafka # google-cloud-memcache # google-cloud-monitoring # google-cloud-orchestration-airflow @@ -1044,271 +982,317 @@ protobuf==4.25.4 # grpcio-status # opentelemetry-proto # proto-plus -psutil==6.0.0 + # ray +psutil==7.0.0 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core + # apache-airflow-task-sdk # ipykernel -psycopg2-binary==2.9.9 +psycopg2-binary==2.9.10 # via apache-airflow-providers-postgres ptyprocess==0.7.0 # via pexpect pure-eval==0.2.3 # via stack-data -pyarrow==17.0.0 +py-spy==0.4.0 + # via ray +pyarrow==20.0.0 # via + # apache-airflow-providers-google # db-dtypes + # google-cloud-aiplatform # pandas-gbq pyasn1==0.6.1 # via - # -c constraints.txt # pyasn1-modules # rsa -pyasn1-modules==0.4.0 +pyasn1-modules==0.4.1 # via - # -c constraints.txt # gcloud-aio-storage # google-auth pycparser==2.22 # via # -c constraints.txt # cffi -pydantic==2.9.1 +pydantic==2.11.5 # via # -c constraints.txt + # apache-airflow-core + # cadwyn + # fastapi # google-cloud-aiplatform -pydantic-core==2.23.3 + # google-genai + # litellm + # openai + # ray + # retryhttp +pydantic-core==2.33.2 # via # -c constraints.txt # pydantic -pydata-google-auth==1.8.2 +pydata-google-auth==1.9.1 # via pandas-gbq -pydocstyle==6.3.0 - # via -r requirements.in -pygments==2.18.0 +pygments==2.19.1 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # ipython + # pytest # rich -pyjwt==2.9.0 +pyjwt==2.10.1 # via # -c constraints.txt - # apache-airflow - # flask-appbuilder - # flask-jwt-extended + # apache-airflow-core # gcloud-aio-auth -pylint==2.17.5 - # via -r requirements.in -pyogrio==0.10.0 +pyogrio==0.11.0 # via geopandas -pyopenssl==24.2.1 +pyopenssl==25.1.0 # via apache-airflow-providers-google -pyparsing==3.2.0 +pyparsing==3.2.3 # via httplib2 -pyproj==3.7.0 +pyproj==3.7.1 # via geopandas pyproject-hooks==1.2.0 # via # build # pip-tools -pytest==8.3.3 +pytest==8.4.1 # via # -r requirements.in # pytest-mock -pytest-mock==3.14.0 +pytest-mock==3.14.1 # via -r requirements.in -python-daemon==3.0.1 +python-daemon==3.1.2 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core python-dateutil==2.9.0.post0 # via # -c constraints.txt - # apache-airflow - # celery + # apache-airflow-core + # apache-airflow-task-sdk # croniter # env-canada - # flask-appbuilder # google-cloud-bigquery # holidays # jupyter-client # pandas # pendulum - # time-machine -python-nvd3==0.16.0 +python-dotenv==1.1.0 # via # -c constraints.txt - # apache-airflow + # litellm + # uvicorn +python-multipart==0.0.20 + # via + # -c constraints.txt + # fastapi python-slugify==8.0.4 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # apache-airflow-providers-google - # python-nvd3 -pytz==2024.2 +pytz==2025.2 # via # -c constraints.txt # croniter - # flask-babel - # flower # pandas pyyaml==6.0.2 # via # -c constraints.txt - # apispec - # clickclick - # connexion # google-ads -pyzmq==26.2.0 + # google-cloud-aiplatform + # huggingface-hub + # libcst + # ray + # uvicorn +pyzmq==27.0.0 # via # ipykernel # jupyter-client -redis==5.1.1 - # via celery -referencing==0.35.1 +ray[default]==2.42.0 + # via google-cloud-aiplatform +referencing==0.36.2 # via # -c constraints.txt # jsonschema # jsonschema-specifications +regex==2024.11.6 + # via tiktoken requests==2.32.3 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # apache-airflow-providers-http - # connexion # gcsfs # google-api-core # google-cloud-bigquery # google-cloud-storage + # google-genai + # huggingface-hub # looker-sdk # opentelemetry-exporter-otlp-proto-http + # ray # requests-oauthlib # requests-toolbelt + # retryhttp + # tiktoken requests-oauthlib==2.0.0 # via # -r requirements.in # google-auth-oauthlib requests-toolbelt==1.0.0 + # via apache-airflow-providers-http +retryhttp==1.3.3 # via # -c constraints.txt - # apache-airflow - # apache-airflow-providers-http -rfc3339-validator==0.1.4 + # apache-airflow-task-sdk +rich==13.9.4 # via # -c constraints.txt - # apache-airflow -rich==13.8.1 + # apache-airflow-core + # rich-argparse + # rich-toolkit + # typer +rich-argparse==1.7.1 # via # -c constraints.txt - # apache-airflow - # flask-limiter - # rich-argparse -rich-argparse==1.5.2 + # apache-airflow-core +rich-toolkit==0.14.7 # via # -c constraints.txt - # apache-airflow -rpds-py==0.20.0 + # fastapi-cli +rpds-py==0.25.1 # via # -c constraints.txt # jsonschema # referencing -rsa==4.9 +rsa==4.9.1 # via # gcloud-aio-storage # google-auth -setproctitle==1.3.3 +ruamel-yaml==0.18.14 + # via google-cloud-aiplatform +ruamel-yaml-clib==0.2.12 + # via ruamel-yaml +ruff==0.12.2 + # via -r requirements.in +scikit-learn==1.5.2 + # via google-cloud-aiplatform +scipy==1.15.3 + # via scikit-learn +setproctitle==1.3.6 # via # -c constraints.txt - # apache-airflow -shapely==2.0.6 + # apache-airflow-core +shapely==2.1.1 # via # geopandas # google-cloud-aiplatform -six==1.16.0 +shellingham==1.5.4 # via # -c constraints.txt - # asttokens - # prison + # typer +six==1.17.0 + # via + # -c constraints.txt + # opencensus # python-dateutil - # rfc3339-validator # wirerope -slack-sdk==3.33.1 +slack-sdk==3.35.0 # via apache-airflow-providers-slack +smart-open==7.3.0.post1 + # via ray sniffio==1.3.1 # via # -c constraints.txt # anyio # httpx -snowballstemmer==2.2.0 - # via pydocstyle -soupsieve==2.6 + # openai +soupsieve==2.7 # via beautifulsoup4 -sqlalchemy==1.4.54 +sqlalchemy[asyncio]==1.4.54 # via # -c constraints.txt # alembic - # apache-airflow - # flask-appbuilder - # flask-sqlalchemy - # marshmallow-sqlalchemy + # apache-airflow-core # sqlalchemy-bigquery # sqlalchemy-jsonfield # sqlalchemy-spanner # sqlalchemy-utils -sqlalchemy-bigquery==1.12.0 +sqlalchemy-bigquery==1.15.0 # via apache-airflow-providers-google sqlalchemy-jsonfield==1.0.2 # via # -c constraints.txt - # apache-airflow -sqlalchemy-spanner==1.7.0 + # apache-airflow-core +sqlalchemy-spanner==1.14.0 # via apache-airflow-providers-google sqlalchemy-utils==0.41.2 # via # -c constraints.txt - # flask-appbuilder -sqlparse==0.5.1 + # apache-airflow-core +sqlparse==0.5.3 # via # -c constraints.txt # apache-airflow-providers-common-sql # google-cloud-spanner stack-data==0.6.3 # via ipython +starlette==0.46.2 + # via + # -c constraints.txt + # cadwyn + # fastapi +structlog==25.4.0 + # via + # -c constraints.txt + # apache-airflow-task-sdk +svcs==25.1.0 + # via + # -c constraints.txt + # apache-airflow-core tabulate==0.9.0 # via # -c constraints.txt - # apache-airflow -tenacity==9.0.0 + # apache-airflow-core +tenacity==9.1.2 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core # apache-airflow-providers-google -termcolor==2.4.0 + # retryhttp +termcolor==3.1.0 # via # -c constraints.txt - # apache-airflow + # apache-airflow-core text-unidecode==1.3 # via # -c constraints.txt # python-slugify -time-machine==2.15.0 +threadpoolctl==3.6.0 + # via scikit-learn +tiktoken==0.9.0 + # via litellm +tokenizers==0.21.2 + # via litellm +tomli==2.2.1 # via # -c constraints.txt - # pendulum -tomli==2.0.2 - # via + # alembic # build # pip-tools - # pylint # pytest -tomlkit==0.13.2 - # via pylint -tornado==6.4.1 +tornado==6.5.1 # via - # flower # ipykernel # jupyter-client +tqdm==4.67.1 + # via + # google-cloud-aiplatform + # huggingface-hub + # openai traitlets==5.14.3 # via # comm @@ -1317,87 +1301,120 @@ traitlets==5.14.3 # jupyter-client # jupyter-core # matplotlib-inline -typing-extensions==4.12.2 +typer==0.16.0 + # via + # -c constraints.txt + # fastapi-cli +types-protobuf==6.30.2.20250703 + # via apache-airflow-providers-google +types-requests==2.32.0.20250602 + # via + # -c constraints.txt + # retryhttp +typing-extensions==4.13.2 # via # -c constraints.txt + # a2wsgi + # aiosignal + # aiosqlite # alembic # anyio + # apache-airflow-core # asgiref - # astroid + # beautifulsoup4 + # cadwyn # cattrs - # flask-limiter + # exceptiongroup + # fastapi + # google-cloud-aiplatform + # google-genai + # huggingface-hub # ipython - # limits # looker-sdk # multidict + # openai # opentelemetry-sdk # pydantic # pydantic-core -tzdata==2024.1 + # pyopenssl + # referencing + # rich + # rich-toolkit + # structlog + # typer + # typing-inspection + # uvicorn +typing-inspection==0.4.1 + # via + # -c constraints.txt + # pydantic +tzdata==2025.2 # via # -c constraints.txt - # celery - # kombu # pandas # pendulum uc-micro-py==1.0.3 # via # -c constraints.txt # linkify-it-py -unicodecsv==0.14.1 - # via - # -c constraints.txt - # apache-airflow -universal-pathlib==0.2.5 +universal-pathlib==0.2.6 # via # -c constraints.txt - # apache-airflow -uritemplate==4.1.1 + # apache-airflow-core +uritemplate==4.2.0 # via google-api-python-client -urllib3==2.2.3 +urllib3==2.4.0 # via # -c constraints.txt # requests -vine==5.1.0 + # types-requests +uuid6==2024.7.10 + # via + # -c constraints.txt + # apache-airflow-core +uvicorn[standard]==0.34.3 + # via + # -c constraints.txt + # fastapi + # fastapi-cli +uvloop==0.21.0 # via # -c constraints.txt - # amqp - # celery - # kombu + # uvicorn +virtualenv==20.31.2 + # via ray voluptuous==0.15.2 # via env-canada +watchfiles==1.0.5 + # via + # -c constraints.txt + # uvicorn wcwidth==0.2.13 # via prompt-toolkit +websockets==14.2 + # via + # -c constraints.txt + # google-genai + # uvicorn werkzeug==2.2.3 # via # -c constraints.txt - # apache-airflow - # connexion # flask - # flask-appbuilder - # flask-jwt-extended - # flask-login -wheel==0.44.0 +wheel==0.45.1 # via pip-tools -wirerope==0.4.7 +wirerope==1.0.0 # via # -c constraints.txt # methodtools -wrapt==1.16.0 +wrapt==1.17.2 # via # -c constraints.txt - # astroid + # aiologic # deprecated -wtforms==3.1.2 - # via - # -c constraints.txt - # flask-appbuilder - # flask-wtf -yarl==1.11.1 - # via - # -c constraints.txt - # aiohttp -zipp==3.20.2 + # smart-open +yarl==1.20.1 + # via aiohttp +zipp==3.22.0 # via # -c constraints.txt # importlib-metadata diff --git a/volumes/miovision/api/intersection_tmc.py b/volumes/miovision/api/intersection_tmc.py index 5154f88e4..885676cbd 100644 --- a/volumes/miovision/api/intersection_tmc.py +++ b/volumes/miovision/api/intersection_tmc.py @@ -12,7 +12,7 @@ from time import sleep from collections import namedtuple -from airflow.hooks.base import BaseHook +from airflow.sdk.bases.hook import BaseHook from airflow.providers.postgres.hooks.postgres import PostgresHook class BreakingError(Exception): diff --git a/volumes/miovision/api/pull_alert.py b/volumes/miovision/api/pull_alert.py index 555b2e955..50191e352 100644 --- a/volumes/miovision/api/pull_alert.py +++ b/volumes/miovision/api/pull_alert.py @@ -11,7 +11,7 @@ from psycopg2 import sql from psycopg2.extras import execute_values -from airflow.hooks.base import BaseHook +from airflow.sdk.bases.hook import BaseHook from airflow.providers.postgres.hooks.postgres import PostgresHook def logger(): diff --git a/wys/api/python/wys_api.py b/wys/api/python/wys_api.py index 39a4997b6..a3025f765 100644 --- a/wys/api/python/wys_api.py +++ b/wys/api/python/wys_api.py @@ -15,7 +15,7 @@ from requests import Session, exceptions from requests.exceptions import RequestException -from airflow.hooks.base import BaseHook +from airflow.sdk.bases.hook import BaseHook from airflow.providers.postgres.hooks.postgres import PostgresHook SQL_DIR = os.path.join(os.path.dirname(os.path.abspath(os.path.dirname(__file__))), 'sql')