From 2a66ab3e35230aec17578f748b0b851691adb923 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Tue, 1 Apr 2025 15:27:55 -0600 Subject: [PATCH 1/5] More aggressively assert that db_mtx protects db.db_data db.db_mtx must be held any time that db.db_data is accessed. All of these functions do have the lock held by a parent; add assertions to ensure that it stays that way. Also, refactor dbuf_read_bonus to make it obvious why db_rwlock isn't required. See https://github.com/openzfs/zfs/discussions/17118 Sponsored by: ConnectWise Signed-off-by: Alan Somers --- module/zfs/dbuf.c | 12 +++++++++--- module/zfs/dmu.c | 1 + module/zfs/dmu_objset.c | 4 +++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 0a243a24266f..fbd84750d471 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1437,6 +1437,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, static int dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn) { + void* db_data; int bonuslen, max_bonuslen; bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); @@ -1444,12 +1445,13 @@ dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn) ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(DB_DNODE_HELD(db)); ASSERT3U(bonuslen, <=, db->db.db_size); - db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP); + db_data = kmem_alloc(max_bonuslen, KM_SLEEP); arc_space_consume(max_bonuslen, ARC_SPACE_BONUS); if (bonuslen < max_bonuslen) - memset(db->db.db_data, 0, max_bonuslen); + memset(db_data, 0, max_bonuslen); if (bonuslen) - memcpy(db->db.db_data, DN_BONUS(dn->dn_phys), bonuslen); + memcpy(db_data, DN_BONUS(dn->dn_phys), bonuslen); + db->db.db_data = db_data; db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "bonus buffer filled"); return (0); @@ -1462,6 +1464,7 @@ dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp) uint32_t indbs = 1ULL << dn->dn_indblkshift; int n_bps = indbs >> SPA_BLKPTRSHIFT; + ASSERT(MUTEX_HELD(&db->db_mtx)); for (int i = 0; i < n_bps; i++) { blkptr_t *bp = &bps[i]; @@ -2520,6 +2523,7 @@ dbuf_undirty_bonus(dbuf_dirty_record_t *dr) { dmu_buf_impl_t *db = dr->dr_dbuf; + ASSERT(MUTEX_HELD(&db->db_mtx)); if (dr->dt.dl.dr_data != db->db.db_data) { struct dnode *dn = dr->dr_dnode; int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); @@ -3874,6 +3878,7 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db) DBUF_GET_BUFC_TYPE(db), db->db.db_size)); } + ASSERT(MUTEX_HELD(&db->db_mtx)); rw_enter(&db->db_rwlock, RW_WRITER); memcpy(db->db.db_data, data->b_data, arc_buf_size(data)); rw_exit(&db->db_rwlock); @@ -3937,6 +3942,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, if (db->db_buf != NULL) { arc_buf_access(db->db_buf); + ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT3P(db->db.db_data, ==, db->db_buf->b_data); } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 2b52ae139bac..37c3bd5a8eb5 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1727,6 +1727,7 @@ dmu_object_cached_size(objset_t *os, uint64_t object, err = dbuf_read(db, NULL, DB_RF_CANFAIL); if (err == 0) { + ASSERT(MUTEX_HELD(&db->db_mtx)); dmu_cached_bps(dmu_objset_spa(os), db->db.db_data, nbps, l1sz, l2sz); } diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index f22a236a6317..4f0ee22b5530 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -2263,8 +2263,10 @@ dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx) dbuf_dirty_record_t *dr; void *data; - if (db->db_dirtycnt == 0) + if (db->db_dirtycnt == 0) { + ASSERT(MUTEX_HELD(&db->db_mtx)); return (db->db.db_data); /* Nothing is changing */ + } dr = dbuf_find_dirty_eq(db, tx->tx_txg); From 7f572ecec895fb0b8e9adc22c02a811a094f75b1 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Mon, 7 Apr 2025 14:30:49 -0600 Subject: [PATCH 2/5] Remove a db_mtx assertion in dmu_object_cached_size It's an error that nothing locks the mutex here. I'll fix it in my next PR. Signed-off-by: Alan Somers --- module/zfs/dmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 37c3bd5a8eb5..2b52ae139bac 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1727,7 +1727,6 @@ dmu_object_cached_size(objset_t *os, uint64_t object, err = dbuf_read(db, NULL, DB_RF_CANFAIL); if (err == 0) { - ASSERT(MUTEX_HELD(&db->db_mtx)); dmu_cached_bps(dmu_objset_spa(os), db->db.db_data, nbps, l1sz, l2sz); } From 7151741297552d950b362f7c3601cee6b0567d9f Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Wed, 30 Apr 2025 15:43:15 -0600 Subject: [PATCH 3/5] Refactor dbuf_hold_copy to eliminiate the db_rwlock Copy data into the newly allocated buffer before assigning it to the db. That way, there will be no need to take db->db_rwlock. Signed-off-by: Alan Somers --- module/zfs/dbuf.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index fbd84750d471..557494338f14 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -3855,6 +3855,7 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db) { dbuf_dirty_record_t *dr = db->db_data_pending; arc_buf_t *data = dr->dt.dl.dr_data; + arc_buf_t *db_data; enum zio_compress compress_type = arc_get_compression(data); uint8_t complevel = arc_get_complevel(data); @@ -3865,23 +3866,22 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db) uint8_t mac[ZIO_DATA_MAC_LEN]; arc_get_raw_params(data, &byteorder, salt, iv, mac); - dbuf_set_data(db, arc_alloc_raw_buf(dn->dn_objset->os_spa, db, + db_data = arc_alloc_raw_buf(dn->dn_objset->os_spa, db, dmu_objset_id(dn->dn_objset), byteorder, salt, iv, mac, dn->dn_type, arc_buf_size(data), arc_buf_lsize(data), - compress_type, complevel)); + compress_type, complevel); } else if (compress_type != ZIO_COMPRESS_OFF) { - dbuf_set_data(db, arc_alloc_compressed_buf( + db_data = arc_alloc_compressed_buf( dn->dn_objset->os_spa, db, arc_buf_size(data), - arc_buf_lsize(data), compress_type, complevel)); + arc_buf_lsize(data), compress_type, complevel); } else { - dbuf_set_data(db, arc_alloc_buf(dn->dn_objset->os_spa, db, - DBUF_GET_BUFC_TYPE(db), db->db.db_size)); + db_data = arc_alloc_buf(dn->dn_objset->os_spa, db, + DBUF_GET_BUFC_TYPE(db), db->db.db_size); } + memcpy(db_data->b_data, data->b_data, arc_buf_size(data)); ASSERT(MUTEX_HELD(&db->db_mtx)); - rw_enter(&db->db_rwlock, RW_WRITER); - memcpy(db->db.db_data, data->b_data, arc_buf_size(data)); - rw_exit(&db->db_rwlock); + dbuf_set_data(db, db_data); } /* From 37b48b9dc74d48e3e4b565ca1d2ddbe70cc963f9 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Wed, 30 Apr 2025 16:48:52 -0600 Subject: [PATCH 4/5] Refactor dbuf_read_hole In the case of an indirect hole, initialize the newly allocated buffer before assigning it to the dmu_buf_impl_t. Signed-off-by: Alan Somers --- module/zfs/dbuf.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 557494338f14..98bc40271f33 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1458,13 +1458,12 @@ dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn) } static void -dbuf_handle_indirect_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *dbbp) +dbuf_handle_indirect_hole(arc_buf_t *db_data, dnode_t *dn, blkptr_t *dbbp) { - blkptr_t *bps = db->db.db_data; + blkptr_t *bps = (blkptr_t*)db_data; uint32_t indbs = 1ULL << dn->dn_indblkshift; int n_bps = indbs >> SPA_BLKPTRSHIFT; - ASSERT(MUTEX_HELD(&db->db_mtx)); for (int i = 0; i < n_bps; i++) { blkptr_t *bp = &bps[i]; @@ -1486,6 +1485,7 @@ static int dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp) { ASSERT(MUTEX_HELD(&db->db_mtx)); + arc_buf_t *db_data; int is_hole = bp == NULL || BP_IS_HOLE(bp); /* @@ -1498,13 +1498,14 @@ dbuf_read_hole(dmu_buf_impl_t *db, dnode_t *dn, blkptr_t *bp) is_hole = dnode_block_freed(dn, db->db_blkid) || BP_IS_HOLE(bp); if (is_hole) { - dbuf_set_data(db, dbuf_alloc_arcbuf(db)); - memset(db->db.db_data, 0, db->db.db_size); + db_data = dbuf_alloc_arcbuf(db); + memset(db_data->b_data, 0, db->db.db_size); if (bp != NULL && db->db_level > 0 && BP_IS_HOLE(bp) && BP_GET_LOGICAL_BIRTH(bp) != 0) { - dbuf_handle_indirect_hole(db, dn, bp); + dbuf_handle_indirect_hole(db_data->b_data, dn, bp); } + dbuf_set_data(db, db_data); db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "hole read satisfied"); return (0); From 3460c6ce50ad596e39169976c52b131526d1f14a Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Thu, 1 May 2025 10:56:02 -0600 Subject: [PATCH 5/5] Apply @amotin's suggestions Signed-off-by: Alan Somers --- module/zfs/dbuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 98bc40271f33..a5200b8a8665 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1458,9 +1458,9 @@ dbuf_read_bonus(dmu_buf_impl_t *db, dnode_t *dn) } static void -dbuf_handle_indirect_hole(arc_buf_t *db_data, dnode_t *dn, blkptr_t *dbbp) +dbuf_handle_indirect_hole(void *data, dnode_t *dn, blkptr_t *dbbp) { - blkptr_t *bps = (blkptr_t*)db_data; + blkptr_t *bps = data; uint32_t indbs = 1ULL << dn->dn_indblkshift; int n_bps = indbs >> SPA_BLKPTRSHIFT; @@ -3881,7 +3881,6 @@ dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db) } memcpy(db_data->b_data, data->b_data, arc_buf_size(data)); - ASSERT(MUTEX_HELD(&db->db_mtx)); dbuf_set_data(db, db_data); }