|
22 | 22 | }, |
23 | 23 | { |
24 | 24 | "cell_type": "code", |
25 | | - "execution_count": 68, |
| 25 | + "execution_count": 1, |
26 | 26 | "id": "37025933", |
27 | 27 | "metadata": {}, |
28 | 28 | "outputs": [ |
29 | 29 | { |
30 | 30 | "name": "stdout", |
31 | 31 | "output_type": "stream", |
32 | 32 | "text": [ |
33 | | - "150.2 kB\n" |
| 33 | + "74.0 kB\n" |
34 | 34 | ] |
35 | 35 | } |
36 | 36 | ], |
|
89 | 89 | }, |
90 | 90 | { |
91 | 91 | "cell_type": "code", |
92 | | - "execution_count": 69, |
| 92 | + "execution_count": 2, |
93 | 93 | "id": "164ecaee", |
94 | 94 | "metadata": {}, |
95 | 95 | "outputs": [ |
|
134 | 134 | "print(json.dumps(item_collection[\"features\"][0], indent=2))" |
135 | 135 | ] |
136 | 136 | }, |
| 137 | + { |
| 138 | + "cell_type": "markdown", |
| 139 | + "id": "9325e2af", |
| 140 | + "metadata": {}, |
| 141 | + "source": [ |
| 142 | + "### Writing in chunks\n", |
| 143 | + "\n", |
| 144 | + "If you have a lot of items, you might not want to load them all into memory at once.\n", |
| 145 | + "We provide a context manager for iteratively writing **stac-geoparquet**.\n", |
| 146 | + "This example is a bit contrived, but you get the idea." |
| 147 | + ] |
| 148 | + }, |
| 149 | + { |
| 150 | + "cell_type": "code", |
| 151 | + "execution_count": 3, |
| 152 | + "id": "9045b4b4", |
| 153 | + "metadata": {}, |
| 154 | + "outputs": [ |
| 155 | + { |
| 156 | + "name": "stdout", |
| 157 | + "output_type": "stream", |
| 158 | + "text": [ |
| 159 | + "Writing batch of 499 items\n", |
| 160 | + "Writing batch of 499 items\n", |
| 161 | + "Writing batch of 499 items\n", |
| 162 | + "Writing batch of 499 items\n", |
| 163 | + "Writing batch of 499 items\n", |
| 164 | + "Writing batch of 499 items\n", |
| 165 | + "Writing batch of 499 items\n", |
| 166 | + "Writing batch of 499 items\n", |
| 167 | + "Writing batch of 499 items\n", |
| 168 | + "Writing batch of 499 items\n", |
| 169 | + "Writing batch of 499 items\n", |
| 170 | + "Writing batch of 499 items\n", |
| 171 | + "Writing batch of 499 items\n", |
| 172 | + "Writing batch of 499 items\n", |
| 173 | + "Writing batch of 499 items\n", |
| 174 | + "Writing batch of 499 items\n", |
| 175 | + "Writing batch of 499 items\n", |
| 176 | + "Writing batch of 499 items\n", |
| 177 | + "Writing batch of 499 items\n", |
| 178 | + "Writing batch of 20 items\n", |
| 179 | + "Read back 10000 items\n" |
| 180 | + ] |
| 181 | + } |
| 182 | + ], |
| 183 | + "source": [ |
| 184 | + "import itertools\n", |
| 185 | + "\n", |
| 186 | + "iterator = itertools.batched(items, 499)\n", |
| 187 | + "\n", |
| 188 | + "with rustac.geoparquet_writer(list(next(iterator)), \"items-batched.parquet\") as writer:\n", |
| 189 | + " for item_batch in iterator:\n", |
| 190 | + " print(f\"Writing batch of {len(item_batch)} items\")\n", |
| 191 | + " writer.write(list(item_batch))\n", |
| 192 | + "\n", |
| 193 | + "\n", |
| 194 | + "item_collection = await rustac.read(\"items-batched.parquet\")\n", |
| 195 | + "print(\"Read back\", len(item_collection[\"features\"]), \"items\")" |
| 196 | + ] |
| 197 | + }, |
137 | 198 | { |
138 | 199 | "cell_type": "markdown", |
139 | 200 | "id": "2223d4ce", |
|
162 | 223 | }, |
163 | 224 | { |
164 | 225 | "cell_type": "code", |
165 | | - "execution_count": 70, |
| 226 | + "execution_count": 4, |
166 | 227 | "id": "870cbebb", |
167 | 228 | "metadata": {}, |
168 | 229 | "outputs": [ |
169 | 230 | { |
170 | 231 | "name": "stdout", |
171 | 232 | "output_type": "stream", |
172 | 233 | "text": [ |
173 | | - "That took 0.37 seconds to read\n", |
174 | | - "That took 1.20 seconds to write\n", |
| 234 | + "That took 0.08 seconds to read\n", |
| 235 | + "That took 0.28 seconds to write\n", |
175 | 236 | "9999 items have a 'foo' property\n" |
176 | 237 | ] |
177 | 238 | } |
|
219 | 280 | }, |
220 | 281 | { |
221 | 282 | "cell_type": "code", |
222 | | - "execution_count": 71, |
| 283 | + "execution_count": 5, |
223 | 284 | "id": "0fabaa18", |
224 | 285 | "metadata": {}, |
225 | 286 | "outputs": [ |
226 | 287 | { |
227 | 288 | "data": { |
228 | 289 | "text/plain": [ |
229 | | - "┌───────────┬──────────────────────────┬───────────────────────────┐\n", |
230 | | - "│ id │ datetime │ geometry │\n", |
231 | | - "│ varchar │ timestamp with time zone │ geometry │\n", |
232 | | - "├───────────┼──────────────────────────┼───────────────────────────┤\n", |
233 | | - "│ item-0 │ 2023-12-31 17:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
234 | | - "│ item-1 │ 2023-12-31 18:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
235 | | - "│ item-2 │ 2023-12-31 19:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
236 | | - "│ item-3 │ 2023-12-31 20:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
237 | | - "│ item-4 │ 2023-12-31 21:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
238 | | - "│ item-5 │ 2023-12-31 22:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
239 | | - "│ item-6 │ 2023-12-31 23:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
240 | | - "│ item-7 │ 2024-01-01 00:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
241 | | - "│ item-8 │ 2024-01-01 01:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
242 | | - "│ item-9 │ 2024-01-01 02:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
243 | | - "│ · │ · │ · │\n", |
244 | | - "│ · │ · │ · │\n", |
245 | | - "│ · │ · │ · │\n", |
246 | | - "│ item-9990 │ 2025-02-19 23:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
247 | | - "│ item-9991 │ 2025-02-20 00:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
248 | | - "│ item-9992 │ 2025-02-20 01:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
249 | | - "│ item-9993 │ 2025-02-20 02:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
250 | | - "│ item-9994 │ 2025-02-20 03:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
251 | | - "│ item-9995 │ 2025-02-20 04:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
252 | | - "│ item-9996 │ 2025-02-20 05:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
253 | | - "│ item-9997 │ 2025-02-20 06:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
254 | | - "│ item-9998 │ 2025-02-20 07:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
255 | | - "│ item-9999 │ 2025-02-20 08:00:00-07 │ POINT (-105.1019 40.1672) │\n", |
256 | | - "├───────────┴──────────────────────────┴───────────────────────────┤\n", |
257 | | - "│ ? rows (>9999 rows, 20 shown) 3 columns │\n", |
258 | | - "└──────────────────────────────────────────────────────────────────┘" |
| 290 | + "┌───────────┬──────────────────────────┬────────────────────────────────────────────────────────────────────┐\n", |
| 291 | + "│ id │ datetime │ geometry │\n", |
| 292 | + "│ varchar │ timestamp with time zone │ blob │\n", |
| 293 | + "├───────────┼──────────────────────────┼────────────────────────────────────────────────────────────────────┤\n", |
| 294 | + "│ item-0 │ 2023-12-31 17:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 295 | + "│ item-1 │ 2023-12-31 18:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 296 | + "│ item-2 │ 2023-12-31 19:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 297 | + "│ item-3 │ 2023-12-31 20:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 298 | + "│ item-4 │ 2023-12-31 21:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 299 | + "│ item-5 │ 2023-12-31 22:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 300 | + "│ item-6 │ 2023-12-31 23:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 301 | + "│ item-7 │ 2024-01-01 00:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 302 | + "│ item-8 │ 2024-01-01 01:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 303 | + "│ item-9 │ 2024-01-01 02:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 304 | + "│ · │ · │ · │\n", |
| 305 | + "│ · │ · │ · │\n", |
| 306 | + "│ · │ · │ · │\n", |
| 307 | + "│ item-9990 │ 2025-02-19 23:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 308 | + "│ item-9991 │ 2025-02-20 00:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 309 | + "│ item-9992 │ 2025-02-20 01:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 310 | + "│ item-9993 │ 2025-02-20 02:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 311 | + "│ item-9994 │ 2025-02-20 03:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 312 | + "│ item-9995 │ 2025-02-20 04:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 313 | + "│ item-9996 │ 2025-02-20 05:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 314 | + "│ item-9997 │ 2025-02-20 06:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 315 | + "│ item-9998 │ 2025-02-20 07:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 316 | + "│ item-9999 │ 2025-02-20 08:00:00-07 │ \\x01\\x01\\x00\\x00\\x00\\x98\\xDD\\x93\\x87\\x85FZ\\xC0\\x13\\xF2A\\xCFf\\x15D@ │\n", |
| 317 | + "├───────────┴──────────────────────────┴────────────────────────────────────────────────────────────────────┤\n", |
| 318 | + "│ ? rows (>9999 rows, 20 shown) 3 columns │\n", |
| 319 | + "└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘" |
259 | 320 | ] |
260 | 321 | }, |
261 | | - "execution_count": 71, |
| 322 | + "execution_count": 5, |
262 | 323 | "metadata": {}, |
263 | 324 | "output_type": "execute_result" |
264 | 325 | } |
|
282 | 343 | }, |
283 | 344 | { |
284 | 345 | "cell_type": "code", |
285 | | - "execution_count": 72, |
| 346 | + "execution_count": 6, |
286 | 347 | "id": "c01c0ef5", |
287 | 348 | "metadata": {}, |
288 | 349 | "outputs": [ |
|
297 | 358 | "└──────────────┘" |
298 | 359 | ] |
299 | 360 | }, |
300 | | - "execution_count": 72, |
| 361 | + "execution_count": 6, |
301 | 362 | "metadata": {}, |
302 | 363 | "output_type": "execute_result" |
303 | 364 | } |
|
318 | 379 | }, |
319 | 380 | { |
320 | 381 | "cell_type": "code", |
321 | | - "execution_count": 73, |
| 382 | + "execution_count": 7, |
322 | 383 | "id": "18bc3a4b", |
323 | 384 | "metadata": {}, |
324 | 385 | "outputs": [ |
|
327 | 388 | "evalue": "Binder Error: Referenced column \"foo\" not found in FROM clause!\nCandidate bindings: \"bbox\"", |
328 | 389 | "output_type": "error", |
329 | 390 | "traceback": [ |
330 | | - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
331 | | - "\u001b[0;31mBinderException\u001b[0m Traceback (most recent call last)", |
332 | | - "Cell \u001b[0;32mIn[73], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mduckdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mselect id, foo from read_parquet([\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mitems.parquet\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnew-items.parquet\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m])\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", |
333 | | - "\u001b[0;31mBinderException\u001b[0m: Binder Error: Referenced column \"foo\" not found in FROM clause!\nCandidate bindings: \"bbox\"" |
| 391 | + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", |
| 392 | + "\u001b[31mBinderException\u001b[39m Traceback (most recent call last)", |
| 393 | + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mduckdb\u001b[49m\u001b[43m.\u001b[49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mselect id, foo from read_parquet([\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mitems.parquet\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m, \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mnew-items.parquet\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m])\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", |
| 394 | + "\u001b[31mBinderException\u001b[39m: Binder Error: Referenced column \"foo\" not found in FROM clause!\nCandidate bindings: \"bbox\"" |
334 | 395 | ] |
335 | 396 | } |
336 | 397 | ], |
|
341 | 402 | ], |
342 | 403 | "metadata": { |
343 | 404 | "kernelspec": { |
344 | | - "display_name": ".venv", |
| 405 | + "display_name": "rustac-py", |
345 | 406 | "language": "python", |
346 | 407 | "name": "python3" |
347 | 408 | }, |
|
0 commit comments