ddt: rework ops interface in terms of keys and values

Store objects store keys and values, so have them take those types and
nothing more. This way, they don't need to be concerned about the "kind"
of entry being operated on; the dispatch layer can take care of the
appropriate conversions.

This adds a "contains" op to see if a particular entry exists without
loading it, which makes a couple of things easier to do; in particular,
it allows us to avoid an allocation in ddt_class_contains().

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Closes #15887
This commit is contained in:
Rob Norris 2023-07-03 23:28:46 +10:00 committed by Brian Behlendorf
parent 5ee0f9c649
commit 9029278dde
4 changed files with 78 additions and 52 deletions

View File

@ -41,15 +41,19 @@ typedef struct {
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
int (*ddt_op_lookup)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
int (*ddt_op_contains)(objset_t *os, uint64_t object,
const ddt_key_t *ddk);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
ddt_entry_t *dde);
int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
const ddt_key_t *ddk);
int (*ddt_op_update)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, const ddt_phys_t *phys, size_t psize,
dmu_tx_t *tx);
int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
uint64_t *walk);
int (*ddt_op_remove)(objset_t *os, uint64_t object,
const ddt_key_t *ddk, dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
ddt_key_t *ddk, ddt_phys_t *phys, size_t psize);
int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
} ddt_ops_t;
@ -62,7 +66,7 @@ extern void ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg);
* outside of the DDT implementation proper, and if you do, consider moving
* them up.
*/
#define DDT_NAMELEN 107
#define DDT_NAMELEN 110
extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);

View File

@ -421,6 +421,7 @@ CFLAGS.gcc+= -Wno-pointer-to-int-cast
CFLAGS.abd.c= -Wno-cast-qual
CFLAGS.ddt.c= -Wno-cast-qual
CFLAGS.ddt_zap.c= -Wno-cast-qual
CFLAGS.dmu.c= -Wno-cast-qual
CFLAGS.dmu_traverse.c= -Wno-cast-qual
CFLAGS.dnode.c= ${NO_WUNUSED_BUT_SET_VARIABLE}

View File

@ -186,18 +186,30 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
return (SET_ERROR(ENOENT));
return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
ddt->ddt_object[type][class], dde));
ddt->ddt_object[type][class], &dde->dde_key,
dde->dde_phys, sizeof (dde->dde_phys)));
}
static int
ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
const ddt_key_t *ddk)
{
if (!ddt_object_exists(ddt, type, class))
return (SET_ERROR(ENOENT));
return (ddt_ops[type]->ddt_op_contains(ddt->ddt_os,
ddt->ddt_object[type][class], ddk));
}
static void
ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde)
const ddt_key_t *ddk)
{
if (!ddt_object_exists(ddt, type, class))
return;
ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
ddt->ddt_object[type][class], dde);
ddt->ddt_object[type][class], ddk);
}
static int
@ -207,17 +219,18 @@ ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
ddt->ddt_object[type][class], dde, tx));
ddt->ddt_object[type][class], &dde->dde_key, dde->dde_phys,
sizeof (dde->dde_phys), tx));
}
static int
ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ddt_entry_t *dde, dmu_tx_t *tx)
const ddt_key_t *ddk, dmu_tx_t *tx)
{
ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,
ddt->ddt_object[type][class], dde, tx));
ddt->ddt_object[type][class], ddk, tx));
}
int
@ -227,7 +240,8 @@ ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
ASSERT(ddt_object_exists(ddt, type, class));
return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
ddt->ddt_object[type][class], dde, walk));
ddt->ddt_object[type][class], walk, &dde->dde_key,
dde->dde_phys, sizeof (dde->dde_phys)));
}
int
@ -523,7 +537,7 @@ void
ddt_prefetch(spa_t *spa, const blkptr_t *bp)
{
ddt_t *ddt;
ddt_entry_t dde;
ddt_key_t ddk;
if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp))
return;
@ -534,11 +548,11 @@ ddt_prefetch(spa_t *spa, const blkptr_t *bp)
* Thus no locking is required as the DDT can't disappear on us.
*/
ddt = ddt_select(spa, bp);
ddt_key_fill(&dde.dde_key, bp);
ddt_key_fill(&ddk, bp);
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
ddt_object_prefetch(ddt, type, class, &dde);
ddt_object_prefetch(ddt, type, class, &ddk);
}
}
}
@ -660,7 +674,7 @@ boolean_t
ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp)
{
ddt_t *ddt;
ddt_entry_t *dde;
ddt_key_t ddk;
if (!BP_GET_DEDUP(bp))
return (B_FALSE);
@ -669,20 +683,16 @@ ddt_class_contains(spa_t *spa, ddt_class_t max_class, const blkptr_t *bp)
return (B_TRUE);
ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)];
dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP);
ddt_key_fill(&(dde->dde_key), bp);
ddt_key_fill(&ddk, bp);
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class <= max_class; class++) {
if (ddt_object_lookup(ddt, type, class, dde) == 0) {
kmem_cache_free(ddt_entry_cache, dde);
if (ddt_object_contains(ddt, type, class, &ddk) == 0)
return (B_TRUE);
}
}
}
kmem_cache_free(ddt_entry_cache, dde);
return (B_FALSE);
}
@ -833,9 +843,9 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
if (otype != DDT_TYPES &&
(otype != ntype || oclass != nclass || total_refcnt == 0)) {
VERIFY0(ddt_object_remove(ddt, otype, oclass, dde, tx));
VERIFY0(ddt_object_remove(ddt, otype, oclass, ddk, tx));
ASSERT3U(
ddt_object_lookup(ddt, otype, oclass, dde), ==, ENOENT);
ddt_object_contains(ddt, otype, oclass, ddk), ==, ENOENT);
}
if (total_refcnt != 0) {

View File

@ -42,7 +42,7 @@ static unsigned int ddt_zap_default_ibs = 15;
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
static size_t
ddt_zap_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
{
uchar_t *version = dst++;
int cpfunc = ZIO_COMPRESS_ZLE;
@ -51,7 +51,8 @@ ddt_zap_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */
c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level);
c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1,
ci->ci_level);
if (c_len == s_len) {
cpfunc = ZIO_COMPRESS_OFF;
@ -93,8 +94,10 @@ ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
ddt_zap_default_bs, ddt_zap_default_ibs,
DMU_OT_NONE, 0, tx);
if (*objectp == 0)
return (SET_ERROR(ENOTSUP));
return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0);
return (0);
}
static int
@ -104,51 +107,57 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
}
static int
ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
ddt_zap_lookup(objset_t *os, uint64_t object,
const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize)
{
uchar_t *cbuf;
uint64_t one, csize;
int error;
error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
error = zap_length_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, &one, &csize);
if (error)
return (error);
ASSERT3U(one, ==, 1);
ASSERT3U(csize, <=, (sizeof (dde->dde_phys) + 1));
ASSERT3U(csize, <=, psize + 1);
cbuf = kmem_alloc(csize, KM_SLEEP);
error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key,
error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, 1, csize, cbuf);
if (error == 0)
ddt_zap_decompress(cbuf, dde->dde_phys, csize,
sizeof (dde->dde_phys));
ddt_zap_decompress(cbuf, phys, csize, psize);
kmem_free(cbuf, csize);
return (error);
}
static void
ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
static int
ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
{
(void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS);
return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
NULL, NULL));
}
static void
ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
{
(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
}
static int
ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx)
{
const size_t cbuf_size = sizeof (dde->dde_phys) + 1;
const size_t cbuf_size = psize + 1;
uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);
uint64_t csize = ddt_zap_compress(dde->dde_phys, cbuf,
sizeof (dde->dde_phys), cbuf_size);
uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);
int error = zap_update_uint64(os, object, (uint64_t *)&dde->dde_key,
int error = zap_update_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, 1, csize, cbuf, tx);
kmem_free(cbuf, cbuf_size);
@ -157,14 +166,16 @@ ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
}
static int
ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
dmu_tx_t *tx)
{
return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key,
return (zap_remove_uint64(os, object, (uint64_t *)ddk,
DDT_KEY_WORDS, tx));
}
static int
ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
ddt_phys_t *phys, size_t psize)
{
zap_cursor_t zc;
zap_attribute_t za;
@ -186,7 +197,7 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
uint64_t csize = za.za_num_integers;
ASSERT3U(za.za_integer_length, ==, 1);
ASSERT3U(csize, <=, sizeof (dde->dde_phys) + 1);
ASSERT3U(csize, <=, psize + 1);
uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);
@ -194,9 +205,8 @@ ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
DDT_KEY_WORDS, 1, csize, cbuf);
ASSERT0(error);
if (error == 0) {
ddt_zap_decompress(cbuf, dde->dde_phys, csize,
sizeof (dde->dde_phys));
dde->dde_key = *(ddt_key_t *)za.za_name;
ddt_zap_decompress(cbuf, phys, csize, psize);
*ddk = *(ddt_key_t *)za.za_name;
}
kmem_free(cbuf, csize);
@ -219,6 +229,7 @@ const ddt_ops_t ddt_zap_ops = {
ddt_zap_create,
ddt_zap_destroy,
ddt_zap_lookup,
ddt_zap_contains,
ddt_zap_prefetch,
ddt_zap_update,
ddt_zap_remove,