Skip to content

Commit 97b367b

Browse files
committed
Fix windows tests after cleanup
1 parent 78660be commit 97b367b

File tree

5 files changed

+50
-20
lines changed

5 files changed

+50
-20
lines changed

src/crawlee/_consts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from __future__ import annotations
22

33
METADATA_FILENAME = '__metadata__.json'
4+
"""The name of the metadata file for storage clients."""

src/crawlee/_utils/file.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@
44
import csv
55
import json
66
import os
7+
import tempfile
8+
from pathlib import Path
79
from typing import TYPE_CHECKING
810

911
if TYPE_CHECKING:
1012
from collections.abc import AsyncIterator
11-
from pathlib import Path
1213
from typing import Any, TextIO
1314

1415
from typing_extensions import Unpack
1516

1617
from crawlee._types import ExportDataCsvKwargs, ExportDataJsonKwargs
1718

1819

19-
METADATA_FILENAME = '__metadata__.json'
20-
"""The name of the metadata file for storage clients."""
21-
22-
2320
def infer_mime_type(value: Any) -> str:
2421
"""Infer the MIME content type from the value.
2522
@@ -58,26 +55,55 @@ async def json_dumps(obj: Any) -> str:
5855

5956

6057
async def atomic_write_text(path: Path, data: str) -> None:
61-
tmp = path.with_suffix(path.suffix + '.tmp')
62-
# write to .tmp
63-
await asyncio.to_thread(tmp.write_text, data, encoding='utf-8')
58+
dir_path = path.parent
59+
60+
def _sync_write_text() -> str:
61+
# create a temp file in the target dir, return its name
62+
fd, tmp_path = tempfile.mkstemp(
63+
suffix=path.suffix,
64+
prefix=f'{path.name}.',
65+
dir=str(dir_path),
66+
)
67+
try:
68+
with os.fdopen(fd, 'w', encoding='utf-8') as tmp_file:
69+
tmp_file.write(data)
70+
except:
71+
Path(tmp_path).unlink(missing_ok=True)
72+
raise
73+
return tmp_path
74+
75+
tmp_path = await asyncio.to_thread(_sync_write_text)
6476

6577
try:
66-
await asyncio.to_thread(os.replace, tmp, path)
67-
except FileNotFoundError:
68-
# If the .tmp vanished, fall back to a straight write
78+
await asyncio.to_thread(os.replace, tmp_path, str(path))
79+
except (FileNotFoundError, PermissionError):
80+
# fallback if tmp went missing
6981
await asyncio.to_thread(path.write_text, data, encoding='utf-8')
7082

7183

7284
async def atomic_write_bytes(path: Path, data: bytes) -> None:
73-
tmp = path.with_suffix(path.suffix + '.tmp')
74-
# write to .tmp
75-
await asyncio.to_thread(tmp.write_bytes, data)
85+
dir_path = path.parent
86+
87+
def _sync_write_bytes() -> str:
88+
fd, tmp_path = tempfile.mkstemp(
89+
suffix=path.suffix,
90+
prefix=f'{path.name}.',
91+
dir=str(dir_path),
92+
)
93+
try:
94+
with os.fdopen(fd, 'wb') as tmp_file:
95+
tmp_file.write(data)
96+
except:
97+
Path(tmp_path).unlink(missing_ok=True)
98+
raise
99+
return tmp_path
100+
101+
tmp_path = await asyncio.to_thread(_sync_write_bytes)
76102

77103
try:
78-
await asyncio.to_thread(os.replace, tmp, path)
79-
except FileNotFoundError:
80-
# If the .tmp vanished, fall back to a straight write
104+
await asyncio.to_thread(os.replace, tmp_path, str(path))
105+
except (FileNotFoundError, PermissionError):
106+
# fallback if tmp went missing
81107
await asyncio.to_thread(path.write_bytes, data)
82108

83109

src/crawlee/storage_clients/_file_system/_dataset_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
from pydantic import ValidationError
1212
from typing_extensions import override
1313

14+
from crawlee._consts import METADATA_FILENAME
1415
from crawlee._utils.crypto import crypto_random_object_id
15-
from crawlee._utils.file import METADATA_FILENAME, atomic_write_text, json_dumps
16+
from crawlee._utils.file import atomic_write_text, json_dumps
1617
from crawlee.storage_clients._base import DatasetClient
1718
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
1819

src/crawlee/storage_clients/_file_system/_key_value_store_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
from pydantic import ValidationError
1313
from typing_extensions import override
1414

15+
from crawlee._consts import METADATA_FILENAME
1516
from crawlee._utils.crypto import crypto_random_object_id
16-
from crawlee._utils.file import METADATA_FILENAME, atomic_write_bytes, atomic_write_text, infer_mime_type, json_dumps
17+
from crawlee._utils.file import atomic_write_bytes, atomic_write_text, infer_mime_type, json_dumps
1718
from crawlee.storage_clients._base import KeyValueStoreClient
1819
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord, KeyValueStoreRecordMetadata
1920

src/crawlee/storage_clients/_file_system/_request_queue_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
from typing_extensions import override
1313

1414
from crawlee import Request
15+
from crawlee._consts import METADATA_FILENAME
1516
from crawlee._utils.crypto import crypto_random_object_id
16-
from crawlee._utils.file import METADATA_FILENAME, atomic_write_text, json_dumps
17+
from crawlee._utils.file import atomic_write_text, json_dumps
1718
from crawlee.storage_clients._base import RequestQueueClient
1819
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
1920

0 commit comments

Comments
 (0)