Skip to content

Commit ca72313

Browse files
committed
Apify storage client fixes and new docs groups
1 parent 44d8e09 commit ca72313

16 files changed

+143
-78
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ dependencies = [
3737
"apify-client>=1.12.0",
3838
"apify-shared>=1.3.0",
3939
"cachetools>=5.5.0",
40-
"crawlee@git+https://github.com/apify/crawlee-python.git@0c4cfc9ada06e35f63213e6a937c4e85defcbecf",
40+
"crawlee@git+https://github.com/apify/crawlee-python.git@master",
4141
"cryptography>=42.0.0",
4242
"httpx>=0.27.0",
4343
# TODO: ensure compatibility with the latest version of lazy-object-proxy

src/apify/_actor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454

5555

5656
@docs_name('Actor')
57-
@docs_group('Classes')
57+
@docs_group('Actor')
5858
class _ActorType:
5959
"""The class of `Actor`. Only make a new instance if you're absolutely sure you need to."""
6060

src/apify/_charging.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
run_validator = TypeAdapter[ActorRun | None](ActorRun | None)
2727

2828

29-
@docs_group('Interfaces')
29+
@docs_group('Charging')
3030
class ChargingManager(Protocol):
3131
"""Provides fine-grained access to pay-per-event functionality."""
3232

@@ -57,7 +57,7 @@ def get_pricing_info(self) -> ActorPricingInfo:
5757
"""
5858

5959

60-
@docs_group('Data structures')
60+
@docs_group('Charging')
6161
@dataclass(frozen=True)
6262
class ChargeResult:
6363
"""Result of the `ChargingManager.charge` method."""
@@ -72,7 +72,7 @@ class ChargeResult:
7272
"""How many events of each known type can still be charged within the limit."""
7373

7474

75-
@docs_group('Data structures')
75+
@docs_group('Charging')
7676
@dataclass
7777
class ActorPricingInfo:
7878
"""Result of the `ChargingManager.get_pricing_info` method."""

src/apify/_configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def _transform_to_list(value: Any) -> list[str] | None:
2525
return value if isinstance(value, list) else str(value).split(',')
2626

2727

28-
@docs_group('Classes')
28+
@docs_group('Configuration')
2929
class Configuration(CrawleeConfiguration):
3030
"""A class for specifying the configuration of an Actor.
3131

src/apify/_models.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from typing import TypeAlias
1717

1818

19-
@docs_group('Data structures')
19+
@docs_group('Other')
2020
class Webhook(BaseModel):
2121
__model_config__ = ConfigDict(populate_by_name=True)
2222

@@ -35,14 +35,14 @@ class Webhook(BaseModel):
3535
] = None
3636

3737

38-
@docs_group('Data structures')
38+
@docs_group('Actor')
3939
class ActorRunMeta(BaseModel):
4040
__model_config__ = ConfigDict(populate_by_name=True)
4141

4242
origin: Annotated[MetaOrigin, Field()]
4343

4444

45-
@docs_group('Data structures')
45+
@docs_group('Actor')
4646
class ActorRunStats(BaseModel):
4747
__model_config__ = ConfigDict(populate_by_name=True)
4848

@@ -63,7 +63,7 @@ class ActorRunStats(BaseModel):
6363
compute_units: Annotated[float, Field(alias='computeUnits')]
6464

6565

66-
@docs_group('Data structures')
66+
@docs_group('Actor')
6767
class ActorRunOptions(BaseModel):
6868
__model_config__ = ConfigDict(populate_by_name=True)
6969

@@ -74,7 +74,7 @@ class ActorRunOptions(BaseModel):
7474
max_total_charge_usd: Annotated[Decimal | None, Field(alias='maxTotalChargeUsd')] = None
7575

7676

77-
@docs_group('Data structures')
77+
@docs_group('Actor')
7878
class ActorRunUsage(BaseModel):
7979
__model_config__ = ConfigDict(populate_by_name=True)
8080

@@ -92,7 +92,7 @@ class ActorRunUsage(BaseModel):
9292
proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
9393

9494

95-
@docs_group('Data structures')
95+
@docs_group('Actor')
9696
class ActorRun(BaseModel):
9797
__model_config__ = ConfigDict(populate_by_name=True)
9898

src/apify/_platform_event_manager.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@
3131
__all__ = ['EventManager', 'LocalEventManager', 'PlatformEventManager']
3232

3333

34-
@docs_group('Data structures')
34+
@docs_group('Event data')
3535
class PersistStateEvent(BaseModel):
3636
name: Literal[Event.PERSIST_STATE]
3737
data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
3838

3939

40-
@docs_group('Data structures')
40+
@docs_group('Event data')
4141
class SystemInfoEventData(BaseModel):
4242
mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
4343
mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
@@ -64,31 +64,31 @@ def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
6464
)
6565

6666

67-
@docs_group('Data structures')
67+
@docs_group('Event data')
6868
class SystemInfoEvent(BaseModel):
6969
name: Literal[Event.SYSTEM_INFO]
7070
data: SystemInfoEventData
7171

7272

73-
@docs_group('Data structures')
73+
@docs_group('Event data')
7474
class MigratingEvent(BaseModel):
7575
name: Literal[Event.MIGRATING]
7676
data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
7777

7878

79-
@docs_group('Data structures')
79+
@docs_group('Event data')
8080
class AbortingEvent(BaseModel):
8181
name: Literal[Event.ABORTING]
8282
data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
8383

8484

85-
@docs_group('Data structures')
85+
@docs_group('Event data')
8686
class ExitEvent(BaseModel):
8787
name: Literal[Event.EXIT]
8888
data: Annotated[EventExitData, Field(default_factory=EventExitData)]
8989

9090

91-
@docs_group('Data structures')
91+
@docs_group('Event data')
9292
class EventWithoutData(BaseModel):
9393
name: Literal[
9494
Event.SESSION_RETIRED,
@@ -101,13 +101,13 @@ class EventWithoutData(BaseModel):
101101
data: Any = None
102102

103103

104-
@docs_group('Data structures')
104+
@docs_group('Event data')
105105
class DeprecatedEvent(BaseModel):
106106
name: Literal['cpuInfo']
107107
data: Annotated[dict[str, Any], Field(default_factory=dict)]
108108

109109

110-
@docs_group('Data structures')
110+
@docs_group('Event data')
111111
class UnknownEvent(BaseModel):
112112
name: str
113113
data: Annotated[dict[str, Any], Field(default_factory=dict)]
@@ -120,7 +120,7 @@ class UnknownEvent(BaseModel):
120120
)
121121

122122

123-
@docs_group('Classes')
123+
@docs_group('Event managers')
124124
class PlatformEventManager(EventManager):
125125
"""A class for managing Actor events.
126126

src/apify/_proxy_configuration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def _check(
7070
raise ValueError(f'{error_str} does not match pattern {pattern.pattern!r}')
7171

7272

73-
@docs_group('Classes')
73+
@docs_group('Configuration')
7474
@dataclass
7575
class ProxyInfo(CrawleeProxyInfo):
7676
"""Provides information about a proxy connection that is used for requests."""
@@ -90,7 +90,7 @@ class ProxyInfo(CrawleeProxyInfo):
9090
"""
9191

9292

93-
@docs_group('Classes')
93+
@docs_group('Configuration')
9494
class ProxyConfiguration(CrawleeProxyConfiguration):
9595
"""Configures a connection to a proxy server with the provided options.
9696

src/apify/_utils.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,18 @@ def is_running_in_ipython() -> bool:
3030
return getattr(builtins, '__IPYTHON__', False)
3131

3232

33-
GroupName = Literal['Classes', 'Abstract classes', 'Interfaces', 'Data structures', 'Errors', 'Functions']
33+
# The order of the rendered API groups is defined in the docusaurus-plugin-typedoc-api.
34+
GroupName = Literal[
35+
'Actor',
36+
'Charging',
37+
'Configuration',
38+
'Event managers',
39+
'Event data',
40+
'Storage clients',
41+
'Storage data',
42+
'Storages',
43+
'Other',
44+
]
3445

3546

3647
def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,6 @@ async def open(
105105
f'(api_public_base_url={api_public_base_url}).'
106106
)
107107

108-
if id and name:
109-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
110-
111108
# Create Apify client with the provided token and API URL.
112109
apify_client_async = ApifyClientAsync(
113110
token=token,
@@ -118,23 +115,40 @@ async def open(
118115
)
119116
apify_datasets_client = apify_client_async.datasets()
120117

118+
# If both id and name are provided, raise an error.
119+
if id and name:
120+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
121+
122+
# If id is provided, get the storage by ID.
123+
if id and name is None:
124+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
125+
121126
# If name is provided, get or create the storage by name.
122-
if name is not None and id is None:
127+
if name and id is None:
123128
id = DatasetMetadata.model_validate(
124129
await apify_datasets_client.get_or_create(name=name),
125130
).id
131+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
126132

127133
# If both id and name are None, try to get the default storage ID from environment variables.
128134
if id is None and name is None:
129-
id = getattr(configuration, 'default_dataset_id', None)
135+
id = configuration.default_dataset_id
136+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
130137

131-
if id is None:
132-
raise ValueError(
133-
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
134-
)
138+
# Fetch its metadata.
139+
metadata = await apify_dataset_client.get()
140+
141+
# If metadata is None, it means the storage does not exist, so we create it.
142+
if metadata is None:
143+
id = DatasetMetadata.model_validate(
144+
await apify_datasets_client.get_or_create(),
145+
).id
146+
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
135147

136-
# Get the client for the specific storage by ID.
137-
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
148+
# Verify that the storage exists by fetching its metadata again.
149+
metadata = await apify_dataset_client.get()
150+
if metadata is None:
151+
raise ValueError(f'Opening dataset with id={id} and name={name} failed.')
138152

139153
return cls(
140154
api_client=apify_dataset_client,

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,6 @@ async def open(
9797
f'(api_public_base_url={api_public_base_url}).'
9898
)
9999

100-
if id and name:
101-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
102-
103100
# Create Apify client with the provided token and API URL.
104101
apify_client_async = ApifyClientAsync(
105102
token=token,
@@ -110,23 +107,40 @@ async def open(
110107
)
111108
apify_kvss_client = apify_client_async.key_value_stores()
112109

110+
# If both id and name are provided, raise an error.
111+
if id and name:
112+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
113+
114+
# If id is provided, get the storage by ID.
115+
if id and name is None:
116+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
117+
113118
# If name is provided, get or create the storage by name.
114-
if name is not None and id is None:
119+
if name and id is None:
115120
id = ApifyKeyValueStoreMetadata.model_validate(
116121
await apify_kvss_client.get_or_create(name=name),
117122
).id
123+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
118124

119125
# If both id and name are None, try to get the default storage ID from environment variables.
120126
if id is None and name is None:
121-
id = getattr(configuration, 'default_key_value_store_id', None)
127+
id = configuration.default_key_value_store_id
128+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
122129

123-
if id is None:
124-
raise ValueError(
125-
'Either "id" or "name" must be provided, or the storage ID must be set in environment variable.'
126-
)
130+
# Fetch its metadata.
131+
metadata = await apify_kvs_client.get()
132+
133+
# If metadata is None, it means the storage does not exist, so we create it.
134+
if metadata is None:
135+
id = ApifyKeyValueStoreMetadata.model_validate(
136+
await apify_kvss_client.get_or_create(),
137+
).id
138+
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
127139

128-
# Get the client for the specific storage by ID.
129-
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
140+
# Verify that the storage exists by fetching its metadata again.
141+
metadata = await apify_kvs_client.get()
142+
if metadata is None:
143+
raise ValueError(f'Opening key-value store with id={id} and name={name} failed.')
130144

131145
return cls(
132146
api_client=apify_kvs_client,

0 commit comments

Comments
 (0)