Skip to content

Commit 91dc3d9

Browse files
authored
avoids passing naming conventions as modules (#3229)
* adds /home/rudolfix/src/dlt to sys.path when running dlt commands and a cli flag to disable it * adds cli docs check to lint * avoids passing custom naming as modules in docs * removes cli docs check due to Python 3.9 * fixes deploy cli * adds pokemon table count consts * improves custom naming convention docs
1 parent 0dcdcf0 commit 91dc3d9

File tree

8 files changed

+63
-25
lines changed

8 files changed

+63
-25
lines changed

Makefile

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,6 @@ test-load-local-postgres:
125125
test-common:
126126
uv run pytest tests/common tests/normalize tests/extract tests/pipeline tests/reflection tests/sources tests/workspace tests/load/test_dummy_client.py tests/libs tests/destinations
127127

128-
reset-test-storage:
129-
-rm -r _storage
130-
mkdir _storage
131-
python3 tests/tools/create_storages.py
132-
133128
build-library: dev
134129
uv version
135130
uv build

dlt/_workspace/cli/_deploy_command.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def __init__(
8080
run_on_push: bool = False,
8181
run_manually: bool = False,
8282
branch: Optional[str] = None,
83+
**kwargs: Any,
8384
):
8485
super().__init__(pipeline_script_path, location, branch)
8586
self.schedule = schedule
@@ -264,6 +265,7 @@ def __init__(
264265
location: str,
265266
branch: Optional[str] = None,
266267
secrets_format: Optional[str] = None,
268+
**kwargs: Any,
267269
):
268270
super().__init__(pipeline_script_path, location, branch)
269271
self.secrets_format = secrets_format

dlt/_workspace/cli/_dlt.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import sys
12
from typing import Any, Sequence, Type, cast, List, Dict, Tuple
23
import argparse
3-
import click
44
import rich_argparse
55
from rich.markdown import Markdown
66

@@ -134,6 +134,15 @@ def _create_parser() -> Tuple[argparse.ArgumentParser, Dict[str, SupportsCliComm
134134
" clear enough."
135135
),
136136
)
137+
parser.add_argument(
138+
"--no-pwd",
139+
default=False,
140+
action="store_true",
141+
help=(
142+
"Do not add current working directory to sys.path. By default $pwd is added to "
143+
"reproduce Python behavior when running scripts."
144+
),
145+
)
137146
subparsers = parser.add_subparsers(title="Available subcommands", dest="command")
138147

139148
# load plugins
@@ -190,6 +199,9 @@ def main() -> int:
190199
# switch to non-interactive if tty not connected
191200
with maybe_no_stdin():
192201
display_run_context_info()
202+
if not args.no_pwd:
203+
if "" not in sys.path:
204+
sys.path.insert(0, "")
193205
cmd.execute(args)
194206
except Exception as ex:
195207
docs_url = cmd.docs_url if hasattr(cmd, "docs_url") else DEFAULT_DOCS_URL
@@ -204,7 +216,7 @@ def main() -> int:
204216

205217
# print exception if available
206218
if raiseable_exception:
207-
click.secho(str(ex), err=True, fg="red")
219+
fmt.secho(str(ex), err=True, fg="red")
208220

209221
fmt.note("Please refer to our docs at '%s' for further assistance." % docs_url)
210222
if _debug.is_debug_enabled() and raiseable_exception:

dlt/_workspace/cli/utils.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,8 @@ def display_run_context_info() -> None:
3737
if run_context.default_profile != run_context.profile:
3838
# print warning
3939
fmt.echo(
40-
"Profile %s activated on %s"
41-
% (
42-
fmt.style(run_context.profile, fg="yellow", reset=True),
43-
fmt.bold(run_context.name),
44-
),
40+
"Profile `%s` is active."
41+
% (fmt.style(run_context.profile, fg="yellow", reset=True),),
4542
err=True,
4643
)
4744

docs/examples/custom_naming/custom_naming.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
1919
With this example you will learn to:
2020
* Create a naming convention module with a recommended layout
21-
* Use naming convention by explicitly passing it to `duckdb` destination factory
21+
* Use naming convention by explicitly passing module name to `postgres` destination factory
2222
* Use naming convention by configuring it config.toml
2323
* Changing the declared case sensitivity by overriding `is_case_sensitive` property
2424
* Providing custom normalization logic by overriding `normalize_identifier` method
@@ -28,12 +28,11 @@
2828
import dlt
2929

3030
if __name__ == "__main__":
31-
# sql_cs_latin2 module
32-
import sql_cs_latin2 # type: ignore[import-not-found]
33-
34-
# create postgres destination with a custom naming convention. pass sql_cs_latin2 as module
3531
# NOTE: ql_cs_latin2 is case sensitive and postgres accepts UNICODE letters in identifiers
36-
dest_ = dlt.destinations.postgres(naming_convention=sql_cs_latin2)
32+
# create postgres destination with a custom naming convention. sql_cs_latin2 is an importable
33+
# module
34+
# import sql_cs_latin2 # is resolving in this context
35+
dest_ = dlt.destinations.postgres(naming_convention="sql_cs_latin2")
3736

3837
# run a pipeline
3938
pipeline = dlt.pipeline(

docs/website/docs/general-usage/naming-convention.md

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,16 @@ password="pass"
134134
```
135135
The snippet above demonstrates how to apply a certain naming for an example `zendesk` source.
136136

137-
You can use naming conventions that you created yourself or got from other users. In that case, you should pass a full Python import path to the [module that contains the naming convention](#write-your-own-naming-convention):
138-
```toml
139-
[schema]
140-
naming="tests.common.cases.normalizers.sql_upper"
137+
You can set the naming convention in your code via destination factory. This will overwrite destination's preferred convention and make it
138+
a default one for the whole pipeline:
139+
140+
```py
141+
import dlt
142+
143+
dest_ = dlt.destinations.postgres(naming_convention="sql_cs_v1")
141144
```
142-
`dlt` will import `tests.common.cases.normalizers.sql_upper` and use the `NamingConvention` class found in it as the naming convention.
143145

146+
You can use naming conventions that you created yourself or got from other users. In that case, you should pass a full Python import path to the [module that contains the naming convention](#write-your-own-naming-convention):
144147

145148
### Available naming conventions
146149
You can pick from a few built-in naming conventions.
@@ -191,7 +194,32 @@ Custom naming conventions are classes that derive from `NamingConvention`, which
191194
1. Each naming convention resides in a separate Python module (file).
192195
2. The class is always named `NamingConvention`.
193196

194-
In that case, you can use a fully qualified module name in [schema configuration](#configure-naming-convention) or pass the module [explicitly](#configure-naming-convention).
197+
In that case, you can use a fully qualified module name in [schema configuration](#configure-naming-convention) or pass the module fully qualified name [explicitly](#configure-naming-convention).
198+
199+
```toml
200+
[schema]
201+
naming="tests.common.cases.normalizers.sql_upper"
202+
```
203+
`dlt` will import `tests.common.cases.normalizers.sql_upper` and use the `NamingConvention` class found in it as the naming convention.
204+
205+
:::tip
206+
Do not pass custom naming convention as modules if you do it explicitly. We recommend pattern below:
207+
```py
208+
import dlt
209+
210+
dest_ = dlt.destinations.postgres(naming_convention="my_package.sql_cs_latin2")
211+
```
212+
213+
⛔ avoid this or you may get pickle errors ie. when using parallel normalization:
214+
```py
215+
import dlt
216+
217+
import my_package.sql_cs_latin2 # type: ignore[import-not-found]
218+
219+
dest_ = dlt.destinations.postgres(naming_convention=my_package.sql_cs_latin2)
220+
```
221+
:::
222+
195223

196224
We include [two examples](../examples/custom_naming) of naming conventions that you may find useful:
197225

docs/website/docs/reference/command-line-interface.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Creates, adds, inspects and deploys dlt pipelines. Further help is available at
3030
**Usage**
3131
```sh
3232
dlt [-h] [--version] [--disable-telemetry] [--enable-telemetry]
33-
[--non-interactive] [--debug]
33+
[--non-interactive] [--debug] [--no-pwd]
3434
{telemetry,schema,pipeline,init,render-docs,deploy,dashboard,ai} ...
3535
```
3636

@@ -45,6 +45,7 @@ dlt [-h] [--version] [--disable-telemetry] [--enable-telemetry]
4545
* `--enable-telemetry` - Enables telemetry before command is executed
4646
* `--non-interactive` - Non interactive mode. default choices are automatically made for confirmations and prompts.
4747
* `--debug` - Displays full stack traces on exceptions. useful for debugging if the output is not clear enough.
48+
* `--no-pwd` - Do not add current working directory to sys.path. by default $pwd is added to reproduce python behavior when running scripts.
4849

4950
**Available subcommands**
5051
* [`telemetry`](#dlt-telemetry) - Shows telemetry status

tests/workspace/cli/common/test_cli_invoke.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None:
148148
assert _deploy_command.call_args[1] == {
149149
"pipeline_script_path": "debug_pipeline.py",
150150
"deployment_method": "github-action",
151+
"no_pwd": False,
151152
"repo_location": "https://github.com/dlt-hub/dlt-deploy-template.git",
152153
"branch": None,
153154
"command": "deploy",
@@ -176,6 +177,7 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None:
176177
assert _deploy_command.call_args[1] == {
177178
"pipeline_script_path": "debug_pipeline.py",
178179
"deployment_method": "github-action",
180+
"no_pwd": False,
179181
"repo_location": "folder",
180182
"branch": "branch",
181183
"command": "deploy",
@@ -197,6 +199,7 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None:
197199
assert _deploy_command.call_args[1] == {
198200
"pipeline_script_path": "debug_pipeline.py",
199201
"deployment_method": "airflow-composer",
202+
"no_pwd": False,
200203
"repo_location": "https://github.com/dlt-hub/dlt-deploy-template.git",
201204
"branch": None,
202205
"command": "deploy",
@@ -212,6 +215,7 @@ def test_invoke_deploy_mock(script_runner: ScriptRunner) -> None:
212215
assert _deploy_command.call_args[1] == {
213216
"pipeline_script_path": "debug_pipeline.py",
214217
"deployment_method": "airflow-composer",
218+
"no_pwd": False,
215219
"repo_location": "https://github.com/dlt-hub/dlt-deploy-template.git",
216220
"branch": None,
217221
"command": "deploy",

0 commit comments

Comments
 (0)