RDMA: Extend RDMA kernel verbs ABI to support flush

This commit extends the RDMA kernel verbs ABI to support the flush
operation defined in IBA A19.4.1. These changes are
backward compatible with the existing RDMA kernel verbs ABI.

It makes device/HCA support new FLUSH attributes/capabilities, and it
also makes memory region support new FLUSH access flags.

Users can use ibv_reg_mr(3) to register flush access flags. Only the
access flags also supported by device's capabilities can be registered
successfully.

Once registered successfully, it means the MR is flushable. Similarly,
A flushable MR should also have one or both of GLOBAL_VISIBILITY and
PERSISTENT attributes/capabilities like device/HCA.

Link: https://lore.kernel.org/r/20221206130201.30986-3-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Li Zhijian 2022-12-06 21:01:53 +08:00 committed by Jason Gunthorpe
parent 0c17da492d
commit 208e3a134b
2 changed files with 20 additions and 1 deletions

View File

@ -84,6 +84,7 @@ enum {
/* opcode 0x15 is reserved */ /* opcode 0x15 is reserved */
IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16,
IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17,
IB_OPCODE_FLUSH = 0x1C,
IB_OPCODE_ATOMIC_WRITE = 0x1D, IB_OPCODE_ATOMIC_WRITE = 0x1D,
/* real constants follow -- see comment about above IB_OPCODE() /* real constants follow -- see comment about above IB_OPCODE()
@ -113,6 +114,7 @@ enum {
IB_OPCODE(RC, FETCH_ADD), IB_OPCODE(RC, FETCH_ADD),
IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
IB_OPCODE(RC, FLUSH),
IB_OPCODE(RC, ATOMIC_WRITE), IB_OPCODE(RC, ATOMIC_WRITE),
/* UC */ /* UC */
@ -151,6 +153,7 @@ enum {
IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
IB_OPCODE(RD, COMPARE_SWAP), IB_OPCODE(RD, COMPARE_SWAP),
IB_OPCODE(RD, FETCH_ADD), IB_OPCODE(RD, FETCH_ADD),
IB_OPCODE(RD, FLUSH),
/* UD */ /* UD */
IB_OPCODE(UD, SEND_ONLY), IB_OPCODE(UD, SEND_ONLY),

View File

@ -270,6 +270,9 @@ enum ib_device_cap_flags {
/* The device supports padding incoming writes to cacheline. */ /* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING = IB_DEVICE_PCI_WRITE_END_PADDING =
IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING, IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
/* Placement type attributes */
IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL,
IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT,
IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE, IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE,
}; };
@ -987,6 +990,7 @@ enum ib_wc_opcode {
IB_WC_REG_MR, IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD, IB_WC_MASKED_FETCH_ADD,
IB_WC_FLUSH = IB_UVERBS_WC_FLUSH,
/* /*
* Set value of IB_WC_RECV so consumers can test if a completion is a * Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV). * receive by testing (opcode & IB_WC_RECV).
@ -1327,6 +1331,7 @@ enum ib_wr_opcode {
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_FLUSH = IB_UVERBS_WR_FLUSH,
IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE, IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE,
/* These are kernel only and can not be issued by userspace */ /* These are kernel only and can not be issued by userspace */
@ -1461,10 +1466,12 @@ enum ib_access_flags {
IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL,
IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT,
IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
IB_ACCESS_SUPPORTED = IB_ACCESS_SUPPORTED =
((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, ((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL,
}; };
/* /*
@ -4325,6 +4332,8 @@ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
static inline int ib_check_mr_access(struct ib_device *ib_dev, static inline int ib_check_mr_access(struct ib_device *ib_dev,
unsigned int flags) unsigned int flags)
{ {
u64 device_cap = ib_dev->attrs.device_cap_flags;
/* /*
* Local write permission is required if remote write or * Local write permission is required if remote write or
* remote atomic permission is also requested. * remote atomic permission is also requested.
@ -4339,6 +4348,13 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev,
if (flags & IB_ACCESS_ON_DEMAND && if (flags & IB_ACCESS_ON_DEMAND &&
!(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if ((flags & IB_ACCESS_FLUSH_GLOBAL &&
!(device_cap & IB_DEVICE_FLUSH_GLOBAL)) ||
(flags & IB_ACCESS_FLUSH_PERSISTENT &&
!(device_cap & IB_DEVICE_FLUSH_PERSISTENT)))
return -EOPNOTSUPP;
return 0; return 0;
} }