From 09e0c3bbde901f17837ad5088a13c3985534b860 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:14 +0300 Subject: [PATCH 01/11] net/sched: taprio: don't access q->qdiscs[] in unoffloaded mode during attach() This is a simple code transformation with no intended behavior change, just to make it absolutely clear that q->qdiscs[] is only attached to the child taprio classes in full offload mode. Right now we use the q->qdiscs[] variable in taprio_attach() for software mode too, but that is quite confusing and avoidable. We use it only to reach the netdev TX queue, but we could as well just use netdev_get_tx_queue() for that. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8c9cfff7fd05..1cbc7fcd56c0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2145,14 +2145,20 @@ static void taprio_attach(struct Qdisc *sch) /* Attach underlying qdisc */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - struct Qdisc *qdisc = q->qdiscs[ntx]; + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); struct Qdisc *old; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + struct Qdisc *qdisc = q->qdiscs[ntx]; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + old = dev_graft_qdisc(dev_queue, qdisc); } else { - old = dev_graft_qdisc(qdisc->dev_queue, sch); + /* In software mode, attach the root taprio qdisc + * to all netdev TX queues, so that dev_qdisc_enqueue() + * goes through taprio_enqueue(). + */ + old = dev_graft_qdisc(dev_queue, sch); qdisc_refcount_inc(sch); } if (old) From 25b0d4e4e41fcf0d2f43c431a82d344149e91258 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:15 +0300 Subject: [PATCH 02/11] net/sched: taprio: keep child Qdisc refcount elevated at 2 in offload mode Normally, Qdiscs have one reference on them held by their owner and one held for each TXQ to which they are attached, however this is not the case with the children of an offloaded taprio. Instead, the taprio qdisc currently lives in the following fragile equilibrium. In the software scheduling case, taprio attaches itself (the root Qdisc) to all TXQs, thus having a refcount of 1 + the number of TX queues. In this mode, the q->qdiscs[] children are not visible directly to the Qdisc API. The lifetime of the Qdiscs from this private array lasts until qdisc_destroy() -> taprio_destroy(). In the fully offloaded case, the root taprio has a refcount of 1, and all child q->qdiscs[] also have a refcount of 1. The child q->qdiscs[] are attached to the netdev TXQs directly and thus are visible to the Qdisc API, however taprio loses a reference to them very early - during qdisc_graft(parent==NULL) -> taprio_attach(). At that time, taprio frees the q->qdiscs[] array to not leak memory, but interestingly, it does not release a reference on these qdiscs because it doesn't effectively own them - they are created by taprio but owned by the Qdisc core, and will be freed by qdisc_graft(parent==NULL, new==NULL) -> qdisc_put(old) when the Qdisc is deleted or when the child Qdisc is replaced with something else. My interest is to change this equilibrium such that taprio also owns a reference on the q->qdiscs[] child Qdiscs for the lifetime of the root Qdisc, including in full offload mode. I want this because I would like taprio_leaf(), taprio_dump_class(), taprio_dump_class_stats() to have insight into q->qdiscs[] for the software scheduling mode - currently they look at dev_queue->qdisc_sleeping, which is, as mentioned, the same as the root taprio. The following set of changes is necessary: - don't free q->qdiscs[] early in taprio_attach(), free it late in taprio_destroy() for consistency with software mode. But: - currently that's not possible, because taprio doesn't own a reference on q->qdiscs[]. So hold that reference - once during the initial attach() and once during subsequent graft() calls when the child is changed. - always keep track of the current child in q->qdiscs[], even for full offload mode, so that we free in taprio_destroy() what we should, and not something stale. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 1cbc7fcd56c0..7c1fc3dc3e55 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2146,30 +2146,31 @@ static void taprio_attach(struct Qdisc *sch) /* Attach underlying qdisc */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); - struct Qdisc *old; + struct Qdisc *old, *dev_queue_qdisc; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { struct Qdisc *qdisc = q->qdiscs[ntx]; + /* In offload mode, the root taprio qdisc is bypassed + * and the netdev TX queues see the children directly + */ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - old = dev_graft_qdisc(dev_queue, qdisc); + dev_queue_qdisc = qdisc; } else { /* In software mode, attach the root taprio qdisc * to all netdev TX queues, so that dev_qdisc_enqueue() * goes through taprio_enqueue(). */ - old = dev_graft_qdisc(dev_queue, sch); - qdisc_refcount_inc(sch); + dev_queue_qdisc = sch; } + old = dev_graft_qdisc(dev_queue, dev_queue_qdisc); + /* The qdisc's refcount requires to be elevated once + * for each netdev TX queue it is grafted onto + */ + qdisc_refcount_inc(dev_queue_qdisc); if (old) qdisc_put(old); } - - /* access to the child qdiscs is not needed in offload mode */ - if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - kfree(q->qdiscs); - q->qdiscs = NULL; - } } static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, @@ -2198,13 +2199,23 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, if (dev->flags & IFF_UP) dev_deactivate(dev); + /* In offload mode, the child Qdisc is directly attached to the netdev + * TX queue, and thus, we need to keep its refcount elevated in order + * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue. + * However, save the reference to the new qdisc in the private array in + * both software and offload cases, to have an up-to-date reference to + * our children. + */ + *old = q->qdiscs[cl - 1]; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - *old = dev_graft_qdisc(dev_queue, new); - } else { - *old = q->qdiscs[cl - 1]; - q->qdiscs[cl - 1] = new; + WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old); + if (new) + qdisc_refcount_inc(new); + if (*old) + qdisc_put(*old); } + q->qdiscs[cl - 1] = new; if (new) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; From 98766add2d55194be9f7d88628a058ceab3b06e6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:16 +0300 Subject: [PATCH 03/11] net/sched: taprio: try again to report q->qdiscs[] to qdisc_leaf() This is another stab at commit 1461d212ab27 ("net/sched: taprio: make qdisc_leaf() see the per-netdev-queue pfifo child qdiscs"), later reverted in commit af7b29b1deaa ("Revert "net/sched: taprio: make qdisc_leaf() see the per-netdev-queue pfifo child qdiscs""). I believe that the problems that caused the revert were fixed, and thus, this change is identical to the original patch. Its purpose is to properly reject attaching a software taprio child qdisc to a software taprio parent. Because unoffloaded taprio currently reports itself (the root Qdisc) as the return value from qdisc_leaf(), then the process of attaching another taprio as child to a Qdisc class of the root will just result in a Qdisc_ops :: change() call for the root. Whereas that's not we want. We want Qdisc_ops :: init() to be called for the taprio child, in order to give the taprio child a chance to check whether its sch->parent is TC_H_ROOT or not (and reject this configuration). Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-4-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 7c1fc3dc3e55..831e6f3fda2a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2453,12 +2453,14 @@ start_error: static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx = cl - 1; - if (!dev_queue) + if (ntx >= dev->num_tx_queues) return NULL; - return rtnl_dereference(dev_queue->qdisc_sleeping); + return q->qdiscs[ntx]; } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) From 6e0ec800c1740372a6bbada0d98e78c2e69ba4a8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:17 +0300 Subject: [PATCH 04/11] net/sched: taprio: delete misleading comment about preallocating child qdiscs As mentioned in commit af7b29b1deaa ("Revert "net/sched: taprio: make qdisc_leaf() see the per-netdev-queue pfifo child qdiscs"") - unlike mqprio, taprio doesn't use q->qdiscs[] only as a temporary transport between Qdisc_ops :: init() and Qdisc_ops :: attach(). Delete the comment, which is just stolen from mqprio, but there, the usage patterns are a lot different, and this is nothing but confusing. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-5-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 831e6f3fda2a..d5693bd52e93 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2099,11 +2099,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, return -EOPNOTSUPP; } - /* pre-allocate qdisc, attachment can't fail */ - q->qdiscs = kcalloc(dev->num_tx_queues, - sizeof(q->qdiscs[0]), + q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]), GFP_KERNEL); - if (!q->qdiscs) return -ENOMEM; From 665338b2a7a0139337d1f85be65ed16e487f84c1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:18 +0300 Subject: [PATCH 05/11] net/sched: taprio: dump class stats for the actual q->qdiscs[] This makes a difference for the software scheduling mode, where dev_queue->qdisc_sleeping is the same as the taprio root Qdisc itself, but when we're talking about what Qdisc and stats get reported for a traffic class, the root taprio isn't what comes to mind, but q->qdiscs[] is. To understand the difference, I've attempted to send 100 packets in software mode through class 8001:5, and recorded the stats before and after the change. Here is before: $ tc -s class show dev eth0 class taprio 8001:1 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:2 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:3 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:4 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:5 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:6 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:7 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:8 root leaf 8001: Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 and here is after: class taprio 8001:1 root Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:2 root Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:3 root Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:4 root Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:5 root Sent 9400 bytes 100 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:6 root Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:7 root Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 class taprio 8001:8 root leaf 800d: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 window_drops 0 The most glaring (and expected) difference is that before, all class stats reported the global stats, whereas now, they really report just the counters for that traffic class. Finally, Pedro Tammela points out that there is a tc selftest which checks specifically which handle do the child Qdiscs corresponding to each class have. That's changing here - taprio no longer reports tcm->tcm_info as the same handle "1:" as itself (the root Qdisc), but 0 (the handle of the default pfifo child Qdiscs). Since iproute2 does not print a child Qdisc handle of 0, adjust the test's expected output. Link: https://lore.kernel.org/netdev/3b83fcf6-a5e8-26fb-8c8a-ec34ec4c3342@mojatatu.com/ Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-6-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 8 +++----- .../selftests/tc-testing/tc-tests/qdiscs/taprio.json | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index d5693bd52e93..1cb5e41c0ec7 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2472,11 +2472,11 @@ static unsigned long taprio_find(struct Qdisc *sch, u32 classid) static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct Qdisc *child = taprio_leaf(sch, cl); tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; + tcm->tcm_info = child->handle; return 0; } @@ -2486,16 +2486,14 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __releases(d->lock) __acquires(d->lock) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct Qdisc *child = taprio_leaf(sch, cl); struct tc_taprio_qopt_offload offload = { .cmd = TAPRIO_CMD_QUEUE_STATS, .queue_stats = { .queue = cl - 1, }, }; - struct Qdisc *child; - child = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 || qdisc_qstats_copy(d, child) < 0) return -1; diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index 08d4861c2e78..860b55df0d6d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -104,7 +104,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI", "expExitCode": "0", "verifyCmd": "$TC class show dev $ETH", - "matchPattern": "class taprio 1:[0-9]+ root leaf 1:", + "matchPattern": "class taprio 1:[0-9]+ root", "matchCount": "8", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" From 40b0425f8ba17c32cf7182975032a3999c364dfc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:19 +0300 Subject: [PATCH 06/11] net: ptp: create a mock-up PTP Hardware Clock driver There are several cases where virtual net devices may benefit from having a PTP clock, and these have to do with testing. I can see at least netdevsim and veth as potential users of a common mock-up PTP hardware clock driver. The proposed idea is to create an object which emulates PTP clock operations on top of the unadjustable CLOCK_MONOTONIC_RAW plus a software-controlled time domain via a timecounter/cyclecounter and then link that PHC to the netdevsim device. The driver is fully functional for its intended purpose, and it successfully passes the PTP selftests. $ cd tools/testing/selftests/ptp/ $ ./phc.sh /dev/ptp2 TEST: settime [ OK ] TEST: adjtime [ OK ] TEST: adjfreq [ OK ] Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-7-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 7 ++ drivers/ptp/Kconfig | 11 +++ drivers/ptp/Makefile | 1 + drivers/ptp/ptp_mock.c | 175 +++++++++++++++++++++++++++++++++++++++ include/linux/ptp_mock.h | 38 +++++++++ 5 files changed, 232 insertions(+) create mode 100644 drivers/ptp/ptp_mock.c create mode 100644 include/linux/ptp_mock.h diff --git a/MAINTAINERS b/MAINTAINERS index 6efcd8713682..49f032556cf1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17170,6 +17170,13 @@ F: drivers/ptp/* F: include/linux/ptp_cl* K: (?:\b|_)ptp(?:\b|_) +PTP MOCKUP CLOCK SUPPORT +M: Vladimir Oltean +L: netdev@vger.kernel.org +S: Maintained +F: drivers/ptp/ptp_mock.c +F: include/linux/ptp_mock.h + PTP VIRTUAL CLOCK SUPPORT M: Yangbo Lu L: netdev@vger.kernel.org diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 32dff1b4f891..ed9d97a032f1 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -155,6 +155,17 @@ config PTP_1588_CLOCK_IDTCM To compile this driver as a module, choose M here: the module will be called ptp_clockmatrix. +config PTP_1588_CLOCK_MOCK + tristate "Mock-up PTP clock" + depends on PTP_1588_CLOCK + help + This driver offers a set of PTP clock manipulation operations over + the system monotonic time. It can be used by virtual network device + drivers to emulate PTP capabilities. + + To compile this driver as a module, choose M here: the module + will be called ptp_mock. + config PTP_1588_CLOCK_VMW tristate "VMware virtual PTP clock" depends on ACPI && HYPERVISOR_GUEST && X86 diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 553f18bf3c83..dea0cebd2303 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -16,6 +16,7 @@ ptp-qoriq-y += ptp_qoriq.o ptp-qoriq-$(CONFIG_DEBUG_FS) += ptp_qoriq_debugfs.o obj-$(CONFIG_PTP_1588_CLOCK_IDTCM) += ptp_clockmatrix.o obj-$(CONFIG_PTP_1588_CLOCK_IDT82P33) += ptp_idt82p33.o +obj-$(CONFIG_PTP_1588_CLOCK_MOCK) += ptp_mock.o obj-$(CONFIG_PTP_1588_CLOCK_VMW) += ptp_vmw.o obj-$(CONFIG_PTP_1588_CLOCK_OCP) += ptp_ocp.o obj-$(CONFIG_PTP_DFL_TOD) += ptp_dfl_tod.o diff --git a/drivers/ptp/ptp_mock.c b/drivers/ptp/ptp_mock.c new file mode 100644 index 000000000000..e7b459c846a2 --- /dev/null +++ b/drivers/ptp/ptp_mock.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2023 NXP + * + * Mock-up PTP Hardware Clock driver for virtual network devices + * + * Create a PTP clock which offers PTP time manipulation operations + * using a timecounter/cyclecounter on top of CLOCK_MONOTONIC_RAW. + */ + +#include +#include +#include + +/* Clamp scaled_ppm between -2,097,152,000 and 2,097,152,000, + * and thus "adj" between -68,719,476 and 68,719,476 + */ +#define MOCK_PHC_MAX_ADJ_PPB 32000000 +/* Timestamps from ktime_get_raw() have 1 ns resolution, so the scale factor + * (MULT >> SHIFT) needs to be 1. Pick SHIFT as 31 bits, which translates + * MULT(freq 0) into 0x80000000. + */ +#define MOCK_PHC_CC_SHIFT 31 +#define MOCK_PHC_CC_MULT (1 << MOCK_PHC_CC_SHIFT) +#define MOCK_PHC_FADJ_SHIFT 9 +#define MOCK_PHC_FADJ_DENOMINATOR 15625ULL + +/* The largest cycle_delta that timecounter_read_delta() can handle without a + * 64-bit overflow during the multiplication with cc->mult, given the max "adj" + * we permit, is ~8.3 seconds. Make sure readouts are more frequent than that. + */ +#define MOCK_PHC_REFRESH_INTERVAL (HZ * 5) + +#define info_to_phc(d) container_of((d), struct mock_phc, info) + +struct mock_phc { + struct ptp_clock_info info; + struct ptp_clock *clock; + struct timecounter tc; + struct cyclecounter cc; + spinlock_t lock; +}; + +static u64 mock_phc_cc_read(const struct cyclecounter *cc) +{ + return ktime_get_raw_ns(); +} + +static int mock_phc_adjfine(struct ptp_clock_info *info, long scaled_ppm) +{ + struct mock_phc *phc = info_to_phc(info); + s64 adj; + + adj = (s64)scaled_ppm << MOCK_PHC_FADJ_SHIFT; + adj = div_s64(adj, MOCK_PHC_FADJ_DENOMINATOR); + + spin_lock(&phc->lock); + timecounter_read(&phc->tc); + phc->cc.mult = MOCK_PHC_CC_MULT + adj; + spin_unlock(&phc->lock); + + return 0; +} + +static int mock_phc_adjtime(struct ptp_clock_info *info, s64 delta) +{ + struct mock_phc *phc = info_to_phc(info); + + spin_lock(&phc->lock); + timecounter_adjtime(&phc->tc, delta); + spin_unlock(&phc->lock); + + return 0; +} + +static int mock_phc_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + struct mock_phc *phc = info_to_phc(info); + u64 ns = timespec64_to_ns(ts); + + spin_lock(&phc->lock); + timecounter_init(&phc->tc, &phc->cc, ns); + spin_unlock(&phc->lock); + + return 0; +} + +static int mock_phc_gettime64(struct ptp_clock_info *info, struct timespec64 *ts) +{ + struct mock_phc *phc = info_to_phc(info); + u64 ns; + + spin_lock(&phc->lock); + ns = timecounter_read(&phc->tc); + spin_unlock(&phc->lock); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static long mock_phc_refresh(struct ptp_clock_info *info) +{ + struct timespec64 ts; + + mock_phc_gettime64(info, &ts); + + return MOCK_PHC_REFRESH_INTERVAL; +} + +int mock_phc_index(struct mock_phc *phc) +{ + return ptp_clock_index(phc->clock); +} +EXPORT_SYMBOL_GPL(mock_phc_index); + +struct mock_phc *mock_phc_create(struct device *dev) +{ + struct mock_phc *phc; + int err; + + phc = kzalloc(sizeof(*phc), GFP_KERNEL); + if (!phc) { + err = -ENOMEM; + goto out; + } + + phc->info = (struct ptp_clock_info) { + .owner = THIS_MODULE, + .name = "Mock-up PTP clock", + .max_adj = MOCK_PHC_MAX_ADJ_PPB, + .adjfine = mock_phc_adjfine, + .adjtime = mock_phc_adjtime, + .gettime64 = mock_phc_gettime64, + .settime64 = mock_phc_settime64, + .do_aux_work = mock_phc_refresh, + }; + + phc->cc = (struct cyclecounter) { + .read = mock_phc_cc_read, + .mask = CYCLECOUNTER_MASK(64), + .mult = MOCK_PHC_CC_MULT, + .shift = MOCK_PHC_CC_SHIFT, + }; + + spin_lock_init(&phc->lock); + timecounter_init(&phc->tc, &phc->cc, 0); + + phc->clock = ptp_clock_register(&phc->info, dev); + if (IS_ERR(phc->clock)) { + err = PTR_ERR(phc->clock); + goto out_free_phc; + } + + ptp_schedule_worker(phc->clock, MOCK_PHC_REFRESH_INTERVAL); + + return phc; + +out_free_phc: + kfree(phc); +out: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(mock_phc_create); + +void mock_phc_destroy(struct mock_phc *phc) +{ + ptp_clock_unregister(phc->clock); + kfree(phc); +} +EXPORT_SYMBOL_GPL(mock_phc_destroy); + +MODULE_DESCRIPTION("Mock-up PTP Hardware Clock driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/ptp_mock.h b/include/linux/ptp_mock.h new file mode 100644 index 000000000000..72eb401034d9 --- /dev/null +++ b/include/linux/ptp_mock.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Mock-up PTP Hardware Clock driver for virtual network devices + * + * Copyright 2023 NXP + */ + +#ifndef _PTP_MOCK_H_ +#define _PTP_MOCK_H_ + +struct device; +struct mock_phc; + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK_MOCK) + +struct mock_phc *mock_phc_create(struct device *dev); +void mock_phc_destroy(struct mock_phc *phc); +int mock_phc_index(struct mock_phc *phc); + +#else + +static inline struct mock_phc *mock_phc_create(struct device *dev) +{ + return NULL; +} + +static inline void mock_phc_destroy(struct mock_phc *phc) +{ +} + +static inline int mock_phc_index(struct mock_phc *phc) +{ + return -1; +} + +#endif + +#endif /* _PTP_MOCK_H_ */ From b63e78fca889e07931ec8f259701718a24e5052e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:20 +0300 Subject: [PATCH 07/11] net: netdevsim: use mock PHC driver I'd like to make netdevsim offload tc-taprio, but currently, this Qdisc emits a ETHTOOL_GET_TS_INFO call to the driver to make sure that it has a PTP clock, so that it is reasonably capable of offloading the schedule. By using the mock PHC driver, that becomes possible. Hardware timestamping is not necessary, and netdevsim does not support packet I/O anyway. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-8-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/Kconfig | 1 + drivers/net/netdevsim/ethtool.c | 11 +++++++++++ drivers/net/netdevsim/netdev.c | 11 ++++++++++- drivers/net/netdevsim/netdevsim.h | 2 ++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 368c6f5b327e..4953c1494723 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -592,6 +592,7 @@ config NETDEVSIM depends on INET depends on IPV6 || IPV6=n depends on PSAMPLE || PSAMPLE=n + depends on PTP_1588_CLOCK_MOCK || PTP_1588_CLOCK_MOCK=n select NET_DEVLINK help This driver is a developer testing tool and software model that can diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index ffd9f84b6644..bd546d4d26c6 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -140,6 +140,16 @@ nsim_set_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) return 0; } +static int nsim_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct netdevsim *ns = netdev_priv(dev); + + info->phc_index = mock_phc_index(ns->phc); + + return 0; +} + static const struct ethtool_ops nsim_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, .get_pause_stats = nsim_get_pause_stats, @@ -153,6 +163,7 @@ static const struct ethtool_ops nsim_ethtool_ops = { .set_channels = nsim_set_channels, .get_fecparam = nsim_get_fecparam, .set_fecparam = nsim_set_fecparam, + .get_ts_info = nsim_get_ts_info, }; static void nsim_ethtool_ring_init(struct netdevsim *ns) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0c8daeb0d62b..2a4a0c4065cf 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -291,13 +291,19 @@ static void nsim_setup(struct net_device *dev) static int nsim_init_netdevsim(struct netdevsim *ns) { + struct mock_phc *phc; int err; + phc = mock_phc_create(&ns->nsim_bus_dev->dev); + if (IS_ERR(phc)) + return PTR_ERR(phc); + + ns->phc = phc; ns->netdev->netdev_ops = &nsim_netdev_ops; err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); if (err) - return err; + goto err_phc_destroy; rtnl_lock(); err = nsim_bpf_init(ns); @@ -320,6 +326,8 @@ err_ipsec_teardown: err_utn_destroy: rtnl_unlock(); nsim_udp_tunnels_info_destroy(ns->netdev); +err_phc_destroy: + mock_phc_destroy(ns->phc); return err; } @@ -383,6 +391,7 @@ void nsim_destroy(struct netdevsim *ns) rtnl_unlock(); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) nsim_udp_tunnels_info_destroy(dev); + mock_phc_destroy(ns->phc); free_netdev(dev); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 7be98b7dcca9..028c825b86db 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -93,6 +94,7 @@ struct netdevsim { struct net_device *netdev; struct nsim_dev *nsim_dev; struct nsim_dev_port *nsim_dev_port; + struct mock_phc *phc; u64 tx_packets; u64 tx_bytes; From 35da47fe1c4766451def03a1b4f59c6b13a9373c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:21 +0300 Subject: [PATCH 08/11] net: netdevsim: mimic tc-taprio offload To be able to use netdevsim for tc-testing with an offloaded tc-taprio schedule, it needs to report a PTP clock (which it now does), and to accept ndo_setup_tc(TC_SETUP_QDISC_TAPRIO) calls. Since netdevsim has no packet I/O, this doesn't do anything intelligent, it only allows taprio offload code paths to go through some level of automated testing. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-9-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 2a4a0c4065cf..2eac92f49631 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -209,6 +209,31 @@ static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state) return 0; } +static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats) +{ + stats->window_drops = 0; + stats->tx_overruns = 0; +} + +static int nsim_setup_tc_taprio(struct net_device *dev, + struct tc_taprio_qopt_offload *offload) +{ + int err = 0; + + switch (offload->cmd) { + case TAPRIO_CMD_REPLACE: + case TAPRIO_CMD_DESTROY: + break; + case TAPRIO_CMD_STATS: + nsim_taprio_stats(&offload->stats); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + static LIST_HEAD(nsim_block_cb_list); static int @@ -217,6 +242,8 @@ nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) struct netdevsim *ns = netdev_priv(dev); switch (type) { + case TC_SETUP_QDISC_TAPRIO: + return nsim_setup_tc_taprio(dev, type_data); case TC_SETUP_BLOCK: return flow_block_cb_setup_simple(type_data, &nsim_block_cb_list, From 355adce3010b9e91a29fc675520fee919639d6ee Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:22 +0300 Subject: [PATCH 09/11] selftests/tc-testing: add ptp_mock Kconfig dependency For offloaded tc-taprio testing with netdevsim, the mock-up PHC driver is used. Suggested-by: Victor Nogueira Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230807193324.4128292-10-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 71706197ba0f..5aa8705751f0 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -96,10 +96,11 @@ CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_FQ_PIE=m -CONFIG_NETDEVSIM=m # ## Network testing # CONFIG_CAN=m CONFIG_ATM=y +CONFIG_NETDEVSIM=m +CONFIG_PTP_1588_CLOCK_MOCK=m From 1890cf08bd9992558bfbe710a008a12209389c9b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:23 +0300 Subject: [PATCH 10/11] selftests/tc-testing: test that taprio can only be attached as root Check that the "Can only be attached as root qdisc" error message from taprio is effective by attempting to attach it to a class of another taprio qdisc. That operation should fail. In the bug that was squashed by change "net/sched: taprio: try again to report q->qdiscs[] to qdisc_leaf()", grafting a child taprio to a root software taprio would be misinterpreted as a change() to the root taprio. Catch this by looking at whether the base-time of the root taprio has changed to follow the base-time of the child taprio, something which should have absolutely never happened assuming correct semantics. Vinicius points out that looking at "base_time" in the tc qdisc show output is unreliable because user space is in a race with the kernel applying the setting. So we create a helper bash script which waits while there is any pending schedule. Link: https://lore.kernel.org/netdev/87il9w0xx7.fsf@intel.com/ Signed-off-by: Vladimir Oltean Reviewed-by: Pedro Tammela Link: https://lore.kernel.org/r/20230807193324.4128292-11-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../tc-testing/taprio_wait_for_admin.sh | 16 ++++++ .../tc-testing/tc-tests/qdiscs/taprio.json | 50 +++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100755 tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh diff --git a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh b/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh new file mode 100755 index 000000000000..f5335e8ad6b4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +TC="$1"; shift +ETH="$1"; shift + +# The taprio architecture changes the admin schedule from a hrtimer and not +# from process context, so we need to wait in order to make sure that any +# schedule change actually took place. +while :; do + has_admin="$($TC -j qdisc show dev $ETH root | jq '.[].options | has("admin")')" + if [ "$has_admin" = "false" ]; then + break; + fi + + sleep 1 +done diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index 860b55df0d6d..f475967c509f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -156,5 +156,55 @@ "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" ] + }, + { + "id": "39b4", + "name": "Reject grafting taprio as child qdisc of software taprio", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI", + "./taprio_wait_for_admin.sh $TC $ETH" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 clockid CLOCK_TAI", + "expExitCode": "2", + "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", + "matchPattern": "0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "e8a1", + "name": "Reject grafting taprio as child qdisc of offloaded taprio", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2", + "./taprio_wait_for_admin.sh $TC $ETH" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 flags 0x2", + "expExitCode": "2", + "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", + "matchPattern": "0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] } ] From 29c298d2bc82ccdbd23bf08bbafeb4e874832946 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Aug 2023 22:33:24 +0300 Subject: [PATCH 11/11] selftests/tc-testing: verify that a qdisc can be grafted onto a taprio class The reason behind commit af7b29b1deaa ("Revert "net/sched: taprio: make qdisc_leaf() see the per-netdev-queue pfifo child qdiscs"") was that the patch it reverted caused a crash when attaching a CBS shaper to one of the taprio classes. Prevent that from happening again by adding a test case for it, which now passes correctly in both offload and software modes. Signed-off-by: Vladimir Oltean Reviewed-by: Pedro Tammela Link: https://lore.kernel.org/r/20230807193324.4128292-12-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/qdiscs/taprio.json | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index f475967c509f..0599635c4bc6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -206,5 +206,55 @@ "$TC qdisc del dev $ETH root", "echo \"1\" > /sys/bus/netdevsim/del_device" ] + }, + { + "id": "a7bf", + "name": "Graft cbs as child of software taprio", + "category": [ + "qdisc", + "taprio", + "cbs" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH handle 8002: parent 8001:8 cbs idleslope 20000 sendslope -980000 hicredit 30 locredit -1470", + "expExitCode": "0", + "verifyCmd": "$TC -d qdisc show dev $ETH", + "matchPattern": "qdisc cbs 8002: parent 8001:8 hicredit 30 locredit -1470 sendslope -980000 idleslope 20000 offload 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "6a83", + "name": "Graft cbs as child of offloaded taprio", + "category": [ + "qdisc", + "taprio", + "cbs" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH handle 8002: parent 8001:8 cbs idleslope 20000 sendslope -980000 hicredit 30 locredit -1470", + "expExitCode": "0", + "verifyCmd": "$TC -d qdisc show dev $ETH", + "matchPattern": "qdisc cbs 8002: parent 8001:8 refcnt 2 hicredit 30 locredit -1470 sendslope -980000 idleslope 20000 offload 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] } ]