Skip to content
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
9a04ce5
Add support for 64 bit chunks, including 64 bit chunk dimensions
fortnern Nov 2, 2025
8620437
Committing clang-format changes
github-actions[bot] Nov 2, 2025
c7ad5ef
Fix extra semicolon and promote singel chunk index to layout version …
fortnern Nov 2, 2025
c7e2ff1
Fix uninitialized warnings.
fortnern Nov 2, 2025
286c044
Committing clang-format changes
github-actions[bot] Nov 2, 2025
e29bde0
Spelling
fortnern Nov 2, 2025
3aea6bf
Merge branch '64bit_chunks' of github.com:fortnern/hdf5 into 64bit_ch…
fortnern Nov 2, 2025
9c0090a
Fix failues in big.c for systems that don't support sparse files.
fortnern Nov 2, 2025
5c3bd1b
Committing clang-format changes
github-actions[bot] Nov 2, 2025
17262b8
Hopefully fix issues on 32 bit systems
fortnern Nov 3, 2025
36dfed8
Committing clang-format changes
github-actions[bot] Nov 3, 2025
0c096e6
More 32 bit fixes
fortnern Nov 3, 2025
f4b745b
Merge branch '64bit_chunks' of github.com:fortnern/hdf5 into 64bit_ch…
fortnern Nov 3, 2025
00c2872
Committing clang-format changes
github-actions[bot] Nov 3, 2025
d9ffdde
More 32 bit fixes and formatting
fortnern Nov 3, 2025
0e2a416
Fix warning
fortnern Nov 3, 2025
faf43e9
Committing clang-format changes
github-actions[bot] Nov 3, 2025
4f7c372
Update reference manual and changelog
fortnern Nov 3, 2025
cf77b97
Merge branch '64bit_chunks' of github.com:fortnern/hdf5 into 64bit_ch…
fortnern Nov 3, 2025
fa66fae
Merge branch 'develop' into 64bit_chunks
lrknox Nov 3, 2025
9472ecd
Add chunk encoding size check to H5D__chunk_allocate
fortnern Nov 3, 2025
9e8e0fa
Merge branch '64bit_chunks' of github.com:fortnern/hdf5 into 64bit_ch…
fortnern Nov 3, 2025
a4b8972
Reword changlog note and reference manual entry for H5Pset_chunk()
fortnern Nov 3, 2025
07c1aea
Remove (revert) redundant chunk encoding size check
fortnern Nov 4, 2025
fdad5cd
Add check for type conversion buffer size overflowing hsize_t to
fortnern Nov 4, 2025
204b0f9
Remove another redundant chunk siz check and add an H5_UNLIKELY to an
fortnern Nov 4, 2025
8c54408
Rename open parameter for layout init callback to open_op, add
fortnern Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/gnu-warnings/4.8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# warning flags added for GCC >= 4.3
-Wlarger-than=2560
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Increasing these by 24 bytes to make room for the slightly bigger H5O_layout_t since uint32s changed to hsize_t

-Wlarger-than=2584
-Wlogical-op

# warning flags added for GCC >= 4.4
Expand Down
2 changes: 1 addition & 1 deletion config/gnu-warnings/cxx-4.8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# warning flags added for GCC >= 4.3
-Wlarger-than=2560
-Wlarger-than=2584
-Wlogical-op

# warning flags added for GCC >= 4.4
Expand Down
4 changes: 4 additions & 0 deletions release_docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ All other HDF5 library CMake options are prefixed with `HDF5_`

## Library

### Added support for large chunks

The library now supports chunks larger than 4 GiB using 64 bit addressing. Creating chunks with size >= 4 GiB will upgrade the file format and prevent the dataset from being opened with earlier versions of the library. 32 bit systems will not be able to use these chunks in all circumstances, such as with data filters or a fill value.

### Changed default chunk cache hash table size to 8191

In order to reduce hash collisions and take advantage of modern memory capacity, the default hash table size for the chunk cache has been increased from 521 to 8191. This means the hash table will consume approximately 64 KiB per open dataset. This value can be changed with `H5Pset_cache()` or `H5Pset_chunk_cache()`. This value was chosen because it is a prime number close to 8K.
Expand Down
14 changes: 4 additions & 10 deletions src/H5D.c
Original file line number Diff line number Diff line change
Expand Up @@ -1491,10 +1491,9 @@ herr_t
H5Dwrite_chunk(hid_t dset_id, hid_t dxpl_id, uint32_t filters, const hsize_t *offset, size_t data_size,
const void *buf)
{
H5VL_object_t *vol_obj; /* Dataset for this operation */
H5VL_optional_args_t vol_cb_args; /* Arguments to VOL callback */
H5VL_native_dataset_optional_args_t dset_opt_args; /* Arguments for optional operation */
uint32_t data_size_32; /* Chunk data size (limited to 32-bits currently) */
H5VL_object_t *vol_obj; /* Dataset for this operation */
H5VL_optional_args_t vol_cb_args; /* Arguments to VOL callback */
H5VL_native_dataset_optional_args_t dset_opt_args; /* Arguments for optional operation */
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_API(FAIL)
Expand All @@ -1509,11 +1508,6 @@ H5Dwrite_chunk(hid_t dset_id, hid_t dxpl_id, uint32_t filters, const hsize_t *of
if (0 == data_size)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "data_size cannot be zero");

/* Make sure data size is less than 4 GiB */
data_size_32 = (uint32_t)data_size;
if (data_size != (size_t)data_size_32)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid data_size - chunks cannot be > 4 GiB");

/* Get the default dataset transfer property list if the user didn't provide one */
if (H5P_DEFAULT == dxpl_id)
dxpl_id = H5P_DATASET_XFER_DEFAULT;
Expand All @@ -1523,7 +1517,7 @@ H5Dwrite_chunk(hid_t dset_id, hid_t dxpl_id, uint32_t filters, const hsize_t *of
/* Set up VOL callback arguments */
dset_opt_args.chunk_write.offset = offset;
dset_opt_args.chunk_write.filters = filters;
dset_opt_args.chunk_write.size = data_size_32;
dset_opt_args.chunk_write.size = data_size;
dset_opt_args.chunk_write.buf = buf;
vol_cb_args.op_type = H5VL_NATIVE_DATASET_CHUNK_WRITE;
vol_cb_args.args = &dset_opt_args;
Expand Down
10 changes: 4 additions & 6 deletions src/H5Dbtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1072,16 +1072,14 @@ H5D__btree_idx_iterate_cb(H5F_t H5_ATTR_UNUSED *f, const void *_lt_key, haddr_t
FUNC_ENTER_PACKAGE_NOERR

/* Sanity check for memcpy() */
HDcompile_assert(offsetof(H5D_chunk_rec_t, nbytes) == offsetof(H5D_btree_key_t, nbytes));
HDcompile_assert(sizeof(chunk_rec.nbytes) == sizeof(lt_key->nbytes));
HDcompile_assert(offsetof(H5D_chunk_rec_t, scaled) == offsetof(H5D_btree_key_t, scaled));
HDcompile_assert(sizeof(chunk_rec.scaled) == sizeof(lt_key->scaled));
HDcompile_assert(offsetof(H5D_chunk_rec_t, filter_mask) == offsetof(H5D_btree_key_t, filter_mask));
HDcompile_assert(sizeof(chunk_rec.filter_mask) == sizeof(lt_key->filter_mask));

/* Compose generic chunk record for callback */
H5MM_memcpy(&chunk_rec, lt_key, sizeof(*lt_key));
chunk_rec.chunk_addr = addr;
H5MM_memcpy(&(chunk_rec.scaled), &(lt_key->scaled), sizeof(lt_key->scaled));
chunk_rec.nbytes = (hsize_t)lt_key->nbytes;
chunk_rec.filter_mask = (uint32_t)lt_key->filter_mask;
chunk_rec.chunk_addr = addr;

/* Make "generic chunk" callback */
if ((ret_value = (udata->cb)(&chunk_rec, udata->udata)) < 0)
Expand Down
31 changes: 15 additions & 16 deletions src/H5Dbtree2.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,19 @@
/* User data for creating callback context */
typedef struct H5D_bt2_ctx_ud_t {
const H5F_t *f; /* Pointer to file info */
uint32_t chunk_size; /* Size of chunk (bytes; for filtered object) */
hsize_t chunk_size; /* Size of chunk (bytes; for filtered object) */
unsigned ndims; /* Number of dimensions */
size_t chunk_size_len; /* Size of chunk sizes in the file (bytes) */
uint32_t *dim; /* Size of chunk in elements */
hsize_t *dim; /* Size of chunk in elements */
} H5D_bt2_ctx_ud_t;

/* The callback context */
typedef struct H5D_bt2_ctx_t {
uint32_t chunk_size; /* Size of chunk (bytes; constant for unfiltered object) */
size_t sizeof_addr; /* Size of file addresses in the file (bytes) */
size_t chunk_size_len; /* Size of chunk sizes in the file (bytes) */
unsigned ndims; /* Number of dimensions in chunk */
uint32_t *dim; /* Size of chunk in elements */
hsize_t chunk_size; /* Size of chunk (bytes; constant for unfiltered object) */
size_t sizeof_addr; /* Size of file addresses in the file (bytes) */
size_t chunk_size_len; /* Size of chunk sizes in the file (bytes) */
unsigned ndims; /* Number of dimensions in chunk */
hsize_t *dim; /* Size of chunk in elements */
} H5D_bt2_ctx_t;

/* Callback info for iteration over chunks in v2 B-tree */
Expand Down Expand Up @@ -228,7 +228,7 @@ const H5B2_class_t H5D_BT2_FILT[1] = {{
H5FL_DEFINE_STATIC(H5D_bt2_ctx_t);

/* Declare a free list to manage the page elements */
H5FL_ARR_DEFINE_STATIC(uint32_t, H5O_LAYOUT_NDIMS);
H5FL_ARR_DEFINE_STATIC(hsize_t, H5O_LAYOUT_NDIMS);

/*-------------------------------------------------------------------------
* Function: H5D__bt2_crt_context
Expand All @@ -245,7 +245,7 @@ H5D__bt2_crt_context(void *_udata)
{
H5D_bt2_ctx_ud_t *udata = (H5D_bt2_ctx_ud_t *)_udata; /* User data for building callback context */
H5D_bt2_ctx_t *ctx; /* Callback context structure */
uint32_t *my_dim = NULL; /* Pointer to copy of chunk dimension size */
hsize_t *my_dim = NULL; /* Pointer to copy of chunk dimension size */
void *ret_value = NULL; /* Return value */

FUNC_ENTER_PACKAGE
Expand All @@ -267,9 +267,9 @@ H5D__bt2_crt_context(void *_udata)
ctx->chunk_size_len = udata->chunk_size_len;

/* Set up the "local" information for this dataset's chunk dimension sizes */
if (NULL == (my_dim = (uint32_t *)H5FL_ARR_MALLOC(uint32_t, H5O_LAYOUT_NDIMS)))
if (NULL == (my_dim = (hsize_t *)H5FL_ARR_MALLOC(hsize_t, H5O_LAYOUT_NDIMS)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, NULL, "can't allocate chunk dims");
H5MM_memcpy(my_dim, udata->dim, H5O_LAYOUT_NDIMS * sizeof(uint32_t));
H5MM_memcpy(my_dim, udata->dim, H5O_LAYOUT_NDIMS * sizeof(hsize_t));
ctx->dim = my_dim;

/* Set return value */
Expand Down Expand Up @@ -301,7 +301,7 @@ H5D__bt2_dst_context(void *_ctx)

/* Free array for chunk dimension sizes */
if (ctx->dim)
H5FL_ARR_FREE(uint32_t, ctx->dim);
H5FL_ARR_FREE(hsize_t, ctx->dim);
/* Release callback context */
ctx = H5FL_FREE(H5D_bt2_ctx_t, ctx);

Expand Down Expand Up @@ -561,7 +561,7 @@ H5D__bt2_filt_debug(FILE *stream, int indent, int fwidth, const void *_record, c
assert(0 != record->nbytes);

fprintf(stream, "%*s%-*s %" PRIuHADDR "\n", indent, "", fwidth, "Chunk address:", record->chunk_addr);
fprintf(stream, "%*s%-*s %u bytes\n", indent, "", fwidth, "Chunk size:", (unsigned)record->nbytes);
fprintf(stream, "%*s%-*s %" PRIuHSIZE " bytes\n", indent, "", fwidth, "Chunk size:", record->nbytes);
fprintf(stream, "%*s%-*s 0x%08x\n", indent, "", fwidth, "Filter mask:", record->filter_mask);

fprintf(stream, "%*s%-*s {", indent, "", fwidth, "Logical offset:");
Expand Down Expand Up @@ -953,7 +953,7 @@ H5D__bt2_idx_insert(const H5D_chk_idx_info_t *idx_info, H5D_chunk_ud_t *udata,
bt2_udata.ndims = idx_info->layout->u.chunk.ndims - 1;
bt2_udata.rec.chunk_addr = udata->chunk_block.offset;
if (idx_info->pline->nused > 0) { /* filtered chunk */
H5_CHECKED_ASSIGN(bt2_udata.rec.nbytes, uint32_t, udata->chunk_block.length, hsize_t);
bt2_udata.rec.nbytes = udata->chunk_block.length;
bt2_udata.rec.filter_mask = udata->filter_mask;
} /* end if */
else { /* non-filtered chunk */
Expand Down Expand Up @@ -1237,8 +1237,7 @@ H5D__bt2_remove_cb(const void *_record, void *_udata)
assert(f);

/* Free the space in the file for the object being removed */
H5_CHECK_OVERFLOW(record->nbytes, uint32_t, hsize_t);
if (H5MF_xfree(f, H5FD_MEM_DRAW, record->chunk_addr, (hsize_t)record->nbytes) < 0)
if (H5MF_xfree(f, H5FD_MEM_DRAW, record->chunk_addr, record->nbytes) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "unable to free chunk");

done:
Expand Down
Loading
Loading