Skip to content

Commit 168e744

Browse files
committed
Fix default storage IDs
1 parent 104a168 commit 168e744

File tree

8 files changed

+98
-36
lines changed

8 files changed

+98
-36
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ dependencies = [
3737
"apify-client>=1.9.2",
3838
"apify-shared>=1.3.0",
3939
"cachetools>=5.5.0",
40-
"crawlee@git+https://github.com/apify/crawlee-python.git@new-storage-clients",
40+
"crawlee@git+https://github.com/apify/crawlee-python.git@1cbf15e13af882c864b87f8ed48252bcb3747993",
4141
"cryptography>=42.0.0",
4242
"httpx>=0.27.0",
4343
# TODO: ensure compatibility with the latest version of lazy-object-proxy

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import os
45
from logging import getLogger
56
from typing import TYPE_CHECKING, Any
67

@@ -86,11 +87,35 @@ async def open(
8687

8788
apify_datasets_client = apify_client_async.datasets()
8889

89-
metadata = DatasetMetadata.model_validate(
90-
await apify_datasets_client.get_or_create(name=id if id is not None else name),
91-
)
92-
93-
apify_dataset_client = apify_client_async.dataset(dataset_id=metadata.id)
90+
if id and name:
91+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
92+
93+
# If name is provided, get or create the storage by name.
94+
if name is not None and id is None:
95+
id = DatasetMetadata.model_validate(
96+
await apify_datasets_client.get_or_create(name=name),
97+
).id
98+
99+
# If both id and name are None, try to get the default storage ID from environment variables.
100+
if id is None and name is None:
101+
id = os.environ.get(
102+
'ACTOR_DEFAULT_DATASET_ID',
103+
None,
104+
) or os.environ.get(
105+
'APIFY_DEFAULT_DATASET_ID',
106+
None,
107+
)
108+
109+
if id is None:
110+
raise ValueError(
111+
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
112+
)
113+
114+
# Get the client for the specific storage by ID.
115+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
116+
117+
# Fetch its metadata.
118+
metadata = DatasetMetadata.model_validate(await apify_dataset_client.get())
94119

95120
return cls(
96121
id=metadata.id,

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import os
45
from logging import getLogger
56
from typing import TYPE_CHECKING, Any
67

@@ -88,11 +89,35 @@ async def open(
8889

8990
apify_kvss_client = apify_client_async.key_value_stores()
9091

91-
metadata = KeyValueStoreMetadata.model_validate(
92-
await apify_kvss_client.get_or_create(name=id if id is not None else name),
93-
)
92+
if id and name:
93+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
94+
95+
# If name is provided, get or create the storage by name.
96+
if name is not None and id is None:
97+
id = KeyValueStoreMetadata.model_validate(
98+
await apify_kvss_client.get_or_create(name=name),
99+
).id
100+
101+
# If both id and name are None, try to get the default storage ID from environment variables.
102+
if id is None and name is None:
103+
id = os.environ.get(
104+
'ACTOR_DEFAULT_KEY_VALUE_STORE_ID',
105+
None,
106+
) or os.environ.get(
107+
'APIFY_DEFAULT_KEY_VALUE_STORE_ID',
108+
None,
109+
)
110+
111+
if id is None:
112+
raise ValueError(
113+
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
114+
)
115+
116+
# Get the client for the specific storage by ID.
117+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
94118

95-
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=metadata.id)
119+
# Fetch its metadata.
120+
metadata = KeyValueStoreMetadata.model_validate(await apify_kvs_client.get())
96121

97122
return cls(
98123
id=metadata.id,

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import os
45
from collections import deque
56
from datetime import datetime, timedelta, timezone
67
from logging import getLogger
@@ -118,12 +119,35 @@ async def open(
118119

119120
apify_rqs_client = apify_client_async.request_queues()
120121

121-
# Get or create the request queue
122-
metadata = RequestQueueMetadata.model_validate(
123-
await apify_rqs_client.get_or_create(name=id if id is not None else name),
124-
)
122+
if id and name:
123+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
124+
125+
# If name is provided, get or create the storage by name.
126+
if name is not None and id is None:
127+
id = RequestQueueMetadata.model_validate(
128+
await apify_rqs_client.get_or_create(name=name),
129+
).id
130+
131+
# If both id and name are None, try to get the default storage ID from environment variables.
132+
if id is None and name is None:
133+
id = os.environ.get(
134+
'ACTOR_DEFAULT_REQUEST_QUEUE_ID',
135+
None,
136+
) or os.environ.get(
137+
'APIFY_DEFAULT_REQUEST_QUEUE_ID',
138+
None,
139+
)
140+
141+
if id is None:
142+
raise ValueError(
143+
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
144+
)
145+
146+
# Get the client for the specific storage by ID.
147+
apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
125148

126-
apify_rq_client = apify_client_async.request_queue(request_queue_id=metadata.id)
149+
# Fetch its metadata.
150+
metadata = RequestQueueMetadata.model_validate(await apify_rq_client.get())
127151

128152
# Create the client instance
129153
return cls(

src/apify/storage_clients/_apify/_storage_client.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,7 @@ async def create_dataset_client(
2222
configuration: Configuration | None = None,
2323
) -> ApifyDatasetClient:
2424
configuration = configuration or Configuration.get_global_configuration()
25-
client = await ApifyDatasetClient.open(id=id, name=name, configuration=configuration)
26-
27-
if configuration.purge_on_start:
28-
await client.drop()
29-
client = await ApifyDatasetClient.open(id=id, name=name, configuration=configuration)
30-
31-
return client
25+
return await ApifyDatasetClient.open(id=id, name=name, configuration=configuration)
3226

3327
@override
3428
async def create_kvs_client(
@@ -39,13 +33,7 @@ async def create_kvs_client(
3933
configuration: Configuration | None = None,
4034
) -> ApifyKeyValueStoreClient:
4135
configuration = configuration or Configuration.get_global_configuration()
42-
client = await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
43-
44-
if configuration.purge_on_start:
45-
await client.drop()
46-
client = await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
47-
48-
return client
36+
return await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
4937

5038
@override
5139
async def create_rq_client(
@@ -56,10 +44,4 @@ async def create_rq_client(
5644
configuration: Configuration | None = None,
5745
) -> ApifyRequestQueueClient:
5846
configuration = configuration or Configuration.get_global_configuration()
59-
client = await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration)
60-
61-
if configuration.purge_on_start:
62-
await client.drop()
63-
client = await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration)
64-
65-
return client
47+
return await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration)

tests/integration/actor_source_base/Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ FROM apify/actor-python:BASE_IMAGE_VERSION_PLACEHOLDER
33

44
COPY . ./
55

6+
RUN apt-get update && apt-get install -y \
7+
git \
8+
&& rm -rf /var/lib/apt/lists/*
9+
610
RUN echo "Python version:" \
711
&& python --version \
812
&& echo "Pip version:" \

tests/integration/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def _prepare_test_env() -> None:
5656
service_locator._configuration = None
5757
service_locator._event_manager = None
5858
service_locator._storage_client = None
59+
service_locator._storage_instance_manager = None
5960

6061
# Reset the retrieval flags.
6162
service_locator._configuration_was_retrieved = False

tests/unit/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def _prepare_test_env() -> None:
4545
service_locator._configuration = None
4646
service_locator._event_manager = None
4747
service_locator._storage_client = None
48+
service_locator._storage_instance_manager = None
4849

4950
# Reset the retrieval flags.
5051
service_locator._configuration_was_retrieved = False

0 commit comments

Comments
 (0)