Skip to content

Commit 1f433c8

Browse files
committed
memory and file system
1 parent b20b324 commit 1f433c8

22 files changed

+525
-1681
lines changed

src/crawlee/_service_locator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ def set_event_manager(self, event_manager: EventManager) -> None:
7777
def get_storage_client(self) -> StorageClient:
7878
"""Get the storage client."""
7979
if self._storage_client is None:
80-
from crawlee.storage_clients import MemoryStorageClient
80+
from crawlee.storage_clients import file_system_storage_client
8181

82-
self._storage_client = MemoryStorageClient()
82+
self._storage_client = file_system_storage_client
8383

8484
self._storage_client_was_retrieved = True
8585
return self._storage_client

src/crawlee/_types.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,10 +274,6 @@ async def push_data(
274274
**kwargs: Unpack[PushDataKwargs],
275275
) -> None:
276276
"""Track a call to the `push_data` context helper."""
277-
from crawlee.storages._dataset import Dataset
278-
279-
await Dataset._check_and_serialize(data)
280-
281277
self.push_data_calls.append(
282278
PushDataFunctionCall(
283279
data=data,
Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
from ._base import StorageClient
2-
from ._memory import MemoryStorageClient
2+
from ._file_system import file_system_storage_client
3+
from ._memory import memory_storage_client
34

4-
__all__ = ['MemoryStorageClient', 'StorageClient']
5+
__all__ = [
6+
'StorageClient',
7+
'file_system_storage_client',
8+
'memory_storage_client'
9+
]

src/crawlee/storage_clients/_base/__init__.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,10 @@
22
from ._key_value_store_client import KeyValueStoreClient
33
from ._request_queue_client import RequestQueueClient
44
from ._storage_client import StorageClient
5-
from ._types import ResourceClient
65

76
__all__ = [
87
'DatasetClient',
9-
'DatasetCollectionClient',
108
'KeyValueStoreClient',
11-
'KeyValueStoreCollectionClient',
129
'RequestQueueClient',
13-
'RequestQueueCollectionClient',
14-
'ResourceClient',
1510
'StorageClient',
1611
]

src/crawlee/storage_clients/_base/_dataset_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ async def open(
7676

7777
@abstractmethod
7878
async def drop(self) -> None:
79-
"""Drop the dataset and all its data.
79+
"""Drop the whole dataset and remove all its items.
8080
8181
The backend method for the `Dataset.drop` call.
8282
"""
Lines changed: 6 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,16 @@
11
from __future__ import annotations
22

3-
from abc import ABC, abstractmethod
3+
from dataclasses import dataclass
44
from typing import TYPE_CHECKING
55

6-
from crawlee._utils.docs import docs_group
7-
86
if TYPE_CHECKING:
97
from ._dataset_client import DatasetClient
108
from ._key_value_store_client import KeyValueStoreClient
119
from ._request_queue_client import RequestQueueClient
1210

1311

14-
@docs_group('Abstract classes')
15-
class StorageClient(ABC):
16-
"""Defines an abstract base for storage clients.
17-
18-
It offers interfaces to get subclients for interacting with storage resources like datasets, key-value stores,
19-
and request queues.
20-
"""
21-
22-
@abstractmethod
23-
def dataset(self) -> type[DatasetClient]:
24-
"""Get a dataset client class."""
25-
26-
@abstractmethod
27-
def key_value_store(self) -> type[KeyValueStoreClient]:
28-
"""Get a key-value store client class."""
29-
30-
@abstractmethod
31-
def request_queue(self) -> type[RequestQueueClient]:
32-
"""Get a request queue client class."""
33-
34-
def get_rate_limit_errors(self) -> dict[int, int]:
35-
"""Return statistics about rate limit errors encountered by the HTTP client in storage client."""
36-
return {}
37-
38-
# @abstractmethod
39-
# async def purge_on_start(self) -> None:
40-
# """Performs a purge of the default storages.
41-
42-
# This method ensures that the purge is executed only once during the lifetime of the instance.
43-
# It is primarily used to clean up residual data from previous runs to maintain a clean state.
44-
# If the storage client does not support purging, leave it empty.
45-
# """
12+
@dataclass
13+
class StorageClient:
14+
dataset_client_class: type[DatasetClient]
15+
key_value_store_client_class: type[KeyValueStoreClient]
16+
request_queue_client_class: type[RequestQueueClient]

src/crawlee/storage_clients/_base/_types.py

Lines changed: 0 additions & 13 deletions
This file was deleted.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from ._storage_client import file_system_storage_client
2+
3+
__all__ = ['file_system_storage_client']

0 commit comments

Comments
 (0)