2019-12-27 22:55:18 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
|
2019-12-27 22:55:23 +08:00
|
|
|
#include <net/sock.h>
|
2019-12-27 22:55:18 +08:00
|
|
|
#include <linux/ethtool_netlink.h>
|
2021-08-01 18:41:31 +08:00
|
|
|
#include <linux/pm_runtime.h>
|
2019-12-27 22:55:18 +08:00
|
|
|
#include "netlink.h"
|
|
|
|
|
2019-12-27 22:55:23 +08:00
|
|
|
static struct genl_family ethtool_genl_family;
|
|
|
|
|
2019-12-27 22:55:33 +08:00
|
|
|
static bool ethnl_ok __read_mostly;
|
2019-12-27 22:55:58 +08:00
|
|
|
static u32 ethnl_bcast_seq;
|
2019-12-27 22:55:33 +08:00
|
|
|
|
2020-10-06 06:07:39 +08:00
|
|
|
#define ETHTOOL_FLAGS_BASIC (ETHTOOL_FLAG_COMPACT_BITSETS | \
|
|
|
|
ETHTOOL_FLAG_OMIT_REPLY)
|
|
|
|
#define ETHTOOL_FLAGS_STATS (ETHTOOL_FLAGS_BASIC | ETHTOOL_FLAG_STATS)
|
|
|
|
|
2020-10-06 06:07:36 +08:00
|
|
|
const struct nla_policy ethnl_header_policy[] = {
|
2019-12-27 22:55:23 +08:00
|
|
|
[ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
|
|
|
|
[ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
|
|
|
|
.len = ALTIFNAMSIZ - 1 },
|
2020-10-06 06:07:39 +08:00
|
|
|
[ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
|
|
|
|
ETHTOOL_FLAGS_BASIC),
|
|
|
|
};
|
|
|
|
|
|
|
|
const struct nla_policy ethnl_header_policy_stats[] = {
|
|
|
|
[ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
|
|
|
|
[ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
|
|
|
|
.len = ALTIFNAMSIZ - 1 },
|
|
|
|
[ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
|
|
|
|
ETHTOOL_FLAGS_STATS),
|
2019-12-27 22:55:23 +08:00
|
|
|
};
|
|
|
|
|
2021-08-01 18:37:39 +08:00
|
|
|
int ethnl_ops_begin(struct net_device *dev)
|
|
|
|
{
|
2021-08-01 18:41:31 +08:00
|
|
|
int ret;
|
|
|
|
|
2021-08-01 18:40:05 +08:00
|
|
|
if (!dev)
|
2021-08-06 03:08:22 +08:00
|
|
|
return -ENODEV;
|
2021-08-01 18:40:05 +08:00
|
|
|
|
2021-08-01 18:41:31 +08:00
|
|
|
if (dev->dev.parent)
|
|
|
|
pm_runtime_get_sync(dev->dev.parent);
|
2021-08-01 18:40:05 +08:00
|
|
|
|
2021-12-03 18:13:18 +08:00
|
|
|
if (!netif_device_present(dev) ||
|
|
|
|
dev->reg_state == NETREG_UNREGISTERING) {
|
2021-08-01 18:41:31 +08:00
|
|
|
ret = -ENODEV;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dev->ethtool_ops->begin) {
|
|
|
|
ret = dev->ethtool_ops->begin(dev);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
err:
|
|
|
|
if (dev->dev.parent)
|
|
|
|
pm_runtime_put(dev->dev.parent);
|
|
|
|
|
|
|
|
return ret;
|
2021-08-01 18:37:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void ethnl_ops_complete(struct net_device *dev)
|
|
|
|
{
|
2021-08-06 03:08:22 +08:00
|
|
|
if (dev->ethtool_ops->complete)
|
2021-08-01 18:37:39 +08:00
|
|
|
dev->ethtool_ops->complete(dev);
|
2021-08-01 18:41:31 +08:00
|
|
|
|
|
|
|
if (dev->dev.parent)
|
|
|
|
pm_runtime_put(dev->dev.parent);
|
2021-08-01 18:37:39 +08:00
|
|
|
}
|
|
|
|
|
2019-12-27 22:55:23 +08:00
|
|
|
/**
|
2020-03-13 04:07:38 +08:00
|
|
|
* ethnl_parse_header_dev_get() - parse request header
|
2019-12-27 22:55:23 +08:00
|
|
|
* @req_info: structure to put results into
|
|
|
|
* @header: nest attribute with request header
|
|
|
|
* @net: request netns
|
|
|
|
* @extack: netlink extack for error reporting
|
|
|
|
* @require_dev: fail if no device identified in header
|
|
|
|
*
|
|
|
|
* Parse request header in nested attribute @nest and puts results into
|
|
|
|
* the structure pointed to by @req_info. Extack from @info is used for error
|
|
|
|
* reporting. If req_info->dev is not null on return, reference to it has
|
|
|
|
* been taken. If error is returned, *req_info is null initialized and no
|
|
|
|
* reference is held.
|
|
|
|
*
|
|
|
|
* Return: 0 on success or negative error code
|
|
|
|
*/
|
2020-03-13 04:07:38 +08:00
|
|
|
int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
|
|
|
|
const struct nlattr *header, struct net *net,
|
|
|
|
struct netlink_ext_ack *extack, bool require_dev)
|
2019-12-27 22:55:23 +08:00
|
|
|
{
|
2020-10-06 06:07:35 +08:00
|
|
|
struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
|
2019-12-27 22:55:23 +08:00
|
|
|
const struct nlattr *devname_attr;
|
|
|
|
struct net_device *dev = NULL;
|
2020-03-16 01:17:53 +08:00
|
|
|
u32 flags = 0;
|
2019-12-27 22:55:23 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!header) {
|
|
|
|
NL_SET_ERR_MSG(extack, "request header missing");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2020-10-06 06:07:39 +08:00
|
|
|
/* No validation here, command policy should have a nested policy set
|
|
|
|
* for the header, therefore validation should have already been done.
|
|
|
|
*/
|
2020-10-06 06:07:35 +08:00
|
|
|
ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
|
2020-10-06 06:07:39 +08:00
|
|
|
NULL, extack);
|
2019-12-27 22:55:23 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2020-10-06 06:07:39 +08:00
|
|
|
if (tb[ETHTOOL_A_HEADER_FLAGS])
|
2020-03-16 01:17:53 +08:00
|
|
|
flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);
|
2019-12-27 22:55:23 +08:00
|
|
|
|
2020-03-16 01:17:53 +08:00
|
|
|
devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
|
2019-12-27 22:55:23 +08:00
|
|
|
if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
|
|
|
|
u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);
|
|
|
|
|
|
|
|
dev = dev_get_by_index(net, ifindex);
|
|
|
|
if (!dev) {
|
|
|
|
NL_SET_ERR_MSG_ATTR(extack,
|
|
|
|
tb[ETHTOOL_A_HEADER_DEV_INDEX],
|
|
|
|
"no device matches ifindex");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
/* if both ifindex and ifname are passed, they must match */
|
|
|
|
if (devname_attr &&
|
|
|
|
strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) {
|
|
|
|
dev_put(dev);
|
|
|
|
NL_SET_ERR_MSG_ATTR(extack, header,
|
|
|
|
"ifindex and name do not match");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
} else if (devname_attr) {
|
|
|
|
dev = dev_get_by_name(net, nla_data(devname_attr));
|
|
|
|
if (!dev) {
|
|
|
|
NL_SET_ERR_MSG_ATTR(extack, devname_attr,
|
|
|
|
"no device matches name");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
} else if (require_dev) {
|
|
|
|
NL_SET_ERR_MSG_ATTR(extack, header,
|
|
|
|
"neither ifindex nor name specified");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-12-14 23:47:25 +08:00
|
|
|
req_info->dev = dev;
|
|
|
|
if (dev)
|
2021-12-14 09:39:02 +08:00
|
|
|
netdev_tracker_alloc(dev, &req_info->dev_tracker, GFP_KERNEL);
|
2020-03-16 01:17:53 +08:00
|
|
|
req_info->flags = flags;
|
2019-12-27 22:55:23 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ethnl_fill_reply_header() - Put common header into a reply message
|
|
|
|
* @skb: skb with the message
|
|
|
|
* @dev: network device to describe in header
|
|
|
|
* @attrtype: attribute type to use for the nest
|
|
|
|
*
|
|
|
|
* Create a nested attribute with attributes describing given network device.
|
|
|
|
*
|
|
|
|
* Return: 0 on success, error value (-EMSGSIZE only) on error
|
|
|
|
*/
|
|
|
|
int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
|
|
|
|
u16 attrtype)
|
|
|
|
{
|
|
|
|
struct nlattr *nest;
|
|
|
|
|
|
|
|
if (!dev)
|
|
|
|
return 0;
|
|
|
|
nest = nla_nest_start(skb, attrtype);
|
|
|
|
if (!nest)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (nla_put_u32(skb, ETHTOOL_A_HEADER_DEV_INDEX, (u32)dev->ifindex) ||
|
|
|
|
nla_put_string(skb, ETHTOOL_A_HEADER_DEV_NAME, dev->name))
|
|
|
|
goto nla_put_failure;
|
|
|
|
/* If more attributes are put into reply header, ethnl_header_size()
|
|
|
|
* must be updated to account for them.
|
|
|
|
*/
|
|
|
|
|
|
|
|
nla_nest_end(skb, nest);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nla_put_failure:
|
|
|
|
nla_nest_cancel(skb, nest);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ethnl_reply_init() - Create skb for a reply and fill device identification
|
2020-01-27 06:11:01 +08:00
|
|
|
* @payload: payload length (without netlink and genetlink header)
|
|
|
|
* @dev: device the reply is about (may be null)
|
|
|
|
* @cmd: ETHTOOL_MSG_* message type for reply
|
|
|
|
* @hdr_attrtype: attribute type for common header
|
|
|
|
* @info: genetlink info of the received packet we respond to
|
|
|
|
* @ehdrp: place to store payload pointer returned by genlmsg_new()
|
2019-12-27 22:55:23 +08:00
|
|
|
*
|
|
|
|
* Return: pointer to allocated skb on success, NULL on error
|
|
|
|
*/
|
|
|
|
struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
|
|
|
|
u16 hdr_attrtype, struct genl_info *info,
|
|
|
|
void **ehdrp)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
skb = genlmsg_new(payload, GFP_KERNEL);
|
|
|
|
if (!skb)
|
|
|
|
goto err;
|
|
|
|
*ehdrp = genlmsg_put_reply(skb, info, ðtool_genl_family, 0, cmd);
|
|
|
|
if (!*ehdrp)
|
|
|
|
goto err_free;
|
|
|
|
|
|
|
|
if (dev) {
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = ethnl_fill_reply_header(skb, dev, hdr_attrtype);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
return skb;
|
|
|
|
|
|
|
|
err_free:
|
|
|
|
nlmsg_free(skb);
|
|
|
|
err:
|
|
|
|
if (info)
|
|
|
|
GENL_SET_ERR_MSG(info, "failed to setup reply message");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-07-10 08:42:47 +08:00
|
|
|
void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd)
|
|
|
|
{
|
|
|
|
return genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
|
|
|
ðtool_genl_family, 0, cmd);
|
|
|
|
}
|
|
|
|
|
2020-05-11 03:12:35 +08:00
|
|
|
void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
|
2019-12-27 22:55:58 +08:00
|
|
|
{
|
|
|
|
return genlmsg_put(skb, 0, ++ethnl_bcast_seq, ðtool_genl_family, 0,
|
|
|
|
cmd);
|
|
|
|
}
|
|
|
|
|
2020-05-11 03:12:35 +08:00
|
|
|
int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
|
2019-12-27 22:55:58 +08:00
|
|
|
{
|
|
|
|
return genlmsg_multicast_netns(ðtool_genl_family, dev_net(dev), skb,
|
|
|
|
0, ETHNL_MCGRP_MONITOR, GFP_KERNEL);
|
|
|
|
}
|
|
|
|
|
2019-12-27 22:55:38 +08:00
|
|
|
/* GET request helpers */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* struct ethnl_dump_ctx - context structure for generic dumpit() callback
|
2020-01-27 06:11:01 +08:00
|
|
|
* @ops: request ops of currently processed message type
|
|
|
|
* @req_info: parsed request header of processed request
|
|
|
|
* @reply_data: data needed to compose the reply
|
|
|
|
* @pos_hash: saved iteration position - hashbucket
|
|
|
|
* @pos_idx: saved iteration position - index
|
2019-12-27 22:55:38 +08:00
|
|
|
*
|
|
|
|
* These parameters are kept in struct netlink_callback as context preserved
|
|
|
|
* between iterations. They are initialized by ethnl_default_start() and used
|
|
|
|
* in ethnl_default_dumpit() and ethnl_default_done().
|
|
|
|
*/
|
|
|
|
struct ethnl_dump_ctx {
|
|
|
|
const struct ethnl_request_ops *ops;
|
|
|
|
struct ethnl_req_info *req_info;
|
|
|
|
struct ethnl_reply_data *reply_data;
|
|
|
|
int pos_hash;
|
|
|
|
int pos_idx;
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct ethnl_request_ops *
|
|
|
|
ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
|
2019-12-27 22:55:43 +08:00
|
|
|
[ETHTOOL_MSG_STRSET_GET] = ðnl_strset_request_ops,
|
2019-12-27 22:55:48 +08:00
|
|
|
[ETHTOOL_MSG_LINKINFO_GET] = ðnl_linkinfo_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_LINKINFO_SET] = ðnl_linkinfo_request_ops,
|
2019-12-27 22:56:08 +08:00
|
|
|
[ETHTOOL_MSG_LINKMODES_GET] = ðnl_linkmodes_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_LINKMODES_SET] = ðnl_linkmodes_request_ops,
|
2019-12-27 22:56:23 +08:00
|
|
|
[ETHTOOL_MSG_LINKSTATE_GET] = ðnl_linkstate_request_ops,
|
2020-01-27 06:11:04 +08:00
|
|
|
[ETHTOOL_MSG_DEBUG_GET] = ðnl_debug_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_DEBUG_SET] = ðnl_debug_request_ops,
|
2020-01-27 06:11:13 +08:00
|
|
|
[ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_WOL_SET] = ðnl_wol_request_ops,
|
2020-03-13 04:07:48 +08:00
|
|
|
[ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops,
|
2020-03-13 04:08:08 +08:00
|
|
|
[ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_PRIVFLAGS_SET] = ðnl_privflags_request_ops,
|
2020-03-13 04:08:23 +08:00
|
|
|
[ETHTOOL_MSG_RINGS_GET] = ðnl_rings_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_RINGS_SET] = ðnl_rings_request_ops,
|
2020-03-13 04:08:38 +08:00
|
|
|
[ETHTOOL_MSG_CHANNELS_GET] = ðnl_channels_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_CHANNELS_SET] = ðnl_channels_request_ops,
|
2020-03-28 07:01:08 +08:00
|
|
|
[ETHTOOL_MSG_COALESCE_GET] = ðnl_coalesce_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_COALESCE_SET] = ðnl_coalesce_request_ops,
|
2020-03-28 07:01:23 +08:00
|
|
|
[ETHTOOL_MSG_PAUSE_GET] = ðnl_pause_request_ops,
|
2023-01-26 07:05:18 +08:00
|
|
|
[ETHTOOL_MSG_PAUSE_SET] = ðnl_pause_request_ops,
|
2020-03-28 07:01:38 +08:00
|
|
|
[ETHTOOL_MSG_EEE_GET] = ðnl_eee_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_EEE_SET] = ðnl_eee_request_ops,
|
2021-03-30 11:59:52 +08:00
|
|
|
[ETHTOOL_MSG_FEC_GET] = ðnl_fec_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_FEC_SET] = ðnl_fec_request_ops,
|
2020-03-28 07:01:58 +08:00
|
|
|
[ETHTOOL_MSG_TSINFO_GET] = ðnl_tsinfo_request_ops,
|
2021-04-09 16:06:34 +08:00
|
|
|
[ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops,
|
2021-04-17 03:27:39 +08:00
|
|
|
[ETHTOOL_MSG_STATS_GET] = ðnl_stats_request_ops,
|
2021-06-30 16:11:56 +08:00
|
|
|
[ETHTOOL_MSG_PHC_VCLOCKS_GET] = ðnl_phc_vclocks_request_ops,
|
ethtool: Add ability to control transceiver modules' power mode
Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and
'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver
modules parameters and retrieve their status.
The first parameter to control is the power mode of the module. It is
only relevant for paged memory modules, as flat memory modules always
operate in low power mode.
When a paged memory module is in low power mode, its power consumption
is reduced to the minimum, the management interface towards the host is
available and the data path is deactivated.
User space can choose to put modules that are not currently in use in
low power mode and transition them to high power mode before putting the
associated ports administratively up. This is useful for user space that
favors reduced power consumption and lower temperatures over reduced
link up times. In QSFP-DD modules the transition from low power mode to
high power mode can take a few seconds and this transition is only
expected to get longer with future / more complex modules.
User space can control the power mode of the module via the power mode
policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible
values:
* high: Module is always in high power mode.
* auto: Module is transitioned by the host to high power mode when the
first port using it is put administratively up and to low power mode
when the last port using it is put administratively down.
The operational power mode of the module is available to user space via
the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not
reported to user space when a module is not plugged-in.
The user API is designed to be generic enough so that it could be used
for modules with different memory maps (e.g., SFF-8636, CMIS).
The only implementation of the device driver API in this series is for a
MAC driver (mlxsw) where the module is controlled by the device's
firmware, but it is designed to be generic enough so that it could also
be used by implementations where the module is controlled by the CPU.
CMIS testing
============
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
The module is not in low power mode, as it is not forced by hardware
(LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off).
The power mode can be queried from the kernel. In case
LowPwrAllowRequestHW was on, the kernel would need to take into account
the state of the LowPwrRequestHW signal, which is not visible to user
space.
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp11 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp11 up
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp11 down
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
SFF-8636 testing
================
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7733 mW / -1.12 dBm
Transmit avg optical power (Channel 2) : 0.7649 mW / -1.16 dBm
Transmit avg optical power (Channel 3) : 0.7790 mW / -1.08 dBm
Transmit avg optical power (Channel 4) : 0.7837 mW / -1.06 dBm
Rcvr signal avg optical power(Channel 1) : 0.9302 mW / -0.31 dBm
Rcvr signal avg optical power(Channel 2) : 0.9079 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 3) : 0.8993 mW / -0.46 dBm
Rcvr signal avg optical power(Channel 4) : 0.8778 mW / -0.57 dBm
The module is not in low power mode, as it is not forced by hardware
(Power override is on) or by software (Power set is off).
The power mode can be queried from the kernel. In case Power override
was off, the kernel would need to take into account the state of the
LPMode signal, which is not visible to user space.
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp13 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp13 up
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7934 mW / -1.01 dBm
Transmit avg optical power (Channel 2) : 0.7859 mW / -1.05 dBm
Transmit avg optical power (Channel 3) : 0.7885 mW / -1.03 dBm
Transmit avg optical power (Channel 4) : 0.7985 mW / -0.98 dBm
Rcvr signal avg optical power(Channel 1) : 0.9325 mW / -0.30 dBm
Rcvr signal avg optical power(Channel 2) : 0.9034 mW / -0.44 dBm
Rcvr signal avg optical power(Channel 3) : 0.9086 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 4) : 0.8885 mW / -0.51 dBm
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp13 down
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-06 18:46:42 +08:00
|
|
|
[ETHTOOL_MSG_MODULE_GET] = ðnl_module_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_MODULE_SET] = ðnl_module_request_ops,
|
2022-10-03 14:52:00 +08:00
|
|
|
[ETHTOOL_MSG_PSE_GET] = ðnl_pse_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_PSE_SET] = ðnl_pse_request_ops,
|
2022-12-02 08:25:55 +08:00
|
|
|
[ETHTOOL_MSG_RSS_GET] = ðnl_rss_request_ops,
|
2023-01-10 00:59:39 +08:00
|
|
|
[ETHTOOL_MSG_PLCA_GET_CFG] = ðnl_plca_cfg_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_PLCA_SET_CFG] = ðnl_plca_cfg_request_ops,
|
2023-01-10 00:59:39 +08:00
|
|
|
[ETHTOOL_MSG_PLCA_GET_STATUS] = ðnl_plca_status_request_ops,
|
net: ethtool: add support for MAC Merge layer
The MAC merge sublayer (IEEE 802.3-2018 clause 99) is one of 2
specifications (the other being Frame Preemption; IEEE 802.1Q-2018
clause 6.7.2), which work together to minimize latency caused by frame
interference at TX. The overall goal of TSN is for normal traffic and
traffic with a bounded deadline to be able to cohabitate on the same L2
network and not bother each other too much.
The standards achieve this (partly) by introducing the concept of
preemptible traffic, i.e. Ethernet frames that have a custom value for
the Start-of-Frame-Delimiter (SFD), and these frames can be fragmented
and reassembled at L2 on a link-local basis. The non-preemptible frames
are called express traffic, they are transmitted using a normal SFD, and
they can preempt preemptible frames, therefore having lower latency,
which can matter at lower (100 Mbps) link speeds, or at high MTUs (jumbo
frames around 9K). Preemption is not recursive, i.e. a P frame cannot
preempt another P frame. Preemption also does not depend upon priority,
or otherwise said, an E frame with prio 0 will still preempt a P frame
with prio 7.
In terms of implementation, the standards talk about the presence of an
express MAC (eMAC) which handles express traffic, and a preemptible MAC
(pMAC) which handles preemptible traffic, and these MACs are multiplexed
on the same MII by a MAC merge layer.
To support frame preemption, the definition of the SFD was generalized
to SMD (Start-of-mPacket-Delimiter), where an mPacket is essentially an
Ethernet frame fragment, or a complete frame. Stations unaware of an SMD
value different from the standard SFD will treat P frames as error
frames. To prevent that from happening, a negotiation process is
defined.
On RX, packets are dispatched to the eMAC or pMAC after being filtered
by their SMD. On TX, the eMAC/pMAC classification decision is taken by
the 802.1Q spec, based on packet priority (each of the 8 user priority
values may have an admin-status of preemptible or express).
The MAC Merge layer and the Frame Preemption parameters have some degree
of independence in terms of how software stacks are supposed to deal
with them. The activation of the MM layer is supposed to be controlled
by an LLDP daemon (after it has been communicated that the link partner
also supports it), after which a (hardware-based or not) verification
handshake takes place, before actually enabling the feature. So the
process is intended to be relatively plug-and-play. Whereas FP settings
are supposed to be coordinated across a network using something
approximating NETCONF.
The support contained here is exclusively for the 802.3 (MAC Merge)
portions and not for the 802.1Q (Frame Preemption) parts. This API is
sufficient for an LLDP daemon to do its job. The FP adminStatus variable
from 802.1Q is outside the scope of an LLDP daemon.
I have taken a few creative licenses and augmented the Linux kernel UAPI
compared to the standard managed objects recommended by IEEE 802.3.
These are:
- ETHTOOL_A_MM_PMAC_ENABLED: According to Figure 99-6: Receive
Processing state diagram, a MAC Merge layer is always supposed to be
able to receive P frames. However, this implies keeping the pMAC
powered on, which will consume needless power in applications where FP
will never be used. If LLDP is used, the reception of an Additional
Ethernet Capabilities TLV from the link partner is sufficient
indication that the pMAC should be enabled. So my proposal is that in
Linux, we keep the pMAC turned off by default and that user space
turns it on when needed.
- ETHTOOL_A_MM_VERIFY_ENABLED: The IEEE managed object is called
aMACMergeVerifyDisableTx. I opted for consistency (positive logic) in
the boolean netlink attributes offered, so this is also positive here.
Other than the meaning being reversed, they correspond to the same
thing.
- ETHTOOL_A_MM_MAX_VERIFY_TIME: I found it most reasonable for a LLDP
daemon to maximize the verifyTime variable (delay between SMD-V
transmissions), to maximize its chances that the LP replies. IEEE says
that the verifyTime can range between 1 and 128 ms, but the NXP ENETC
stupidly keeps this variable in a 7 bit register, so the maximum
supported value is 127 ms. I could have chosen to hardcode this in the
LLDP daemon to a lower value, but why not let the kernel expose its
supported range directly.
- ETHTOOL_A_MM_TX_MIN_FRAG_SIZE: the standard managed object is called
aMACMergeAddFragSize, and expresses the "additional" fragment size
(on top of ETH_ZLEN), whereas this expresses the absolute value of the
fragment size.
- ETHTOOL_A_MM_RX_MIN_FRAG_SIZE: there doesn't appear to exist a managed
object mandated by the standard, but user space clearly needs to know
what is the minimum supported fragment size of our local receiver,
since LLDP must advertise a value no lower than that.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-01-19 20:26:54 +08:00
|
|
|
[ETHTOOL_MSG_MM_GET] = ðnl_mm_request_ops,
|
2023-01-26 07:05:19 +08:00
|
|
|
[ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops,
|
2019-12-27 22:55:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
return (struct ethnl_dump_ctx *)cb->ctx;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ethnl_default_parse() - Parse request message
|
|
|
|
* @req_info: pointer to structure to put data into
|
2020-10-06 06:07:33 +08:00
|
|
|
* @tb: parsed attributes
|
2019-12-27 22:55:38 +08:00
|
|
|
* @net: request netns
|
|
|
|
* @request_ops: struct request_ops for request type
|
|
|
|
* @extack: netlink extack for error reporting
|
|
|
|
* @require_dev: fail if no device identified in header
|
|
|
|
*
|
|
|
|
* Parse universal request header and call request specific ->parse_request()
|
|
|
|
* callback (if defined) to parse the rest of the message.
|
|
|
|
*
|
|
|
|
* Return: 0 on success or negative error code
|
|
|
|
*/
|
|
|
|
static int ethnl_default_parse(struct ethnl_req_info *req_info,
|
2020-10-06 06:07:33 +08:00
|
|
|
struct nlattr **tb, struct net *net,
|
2019-12-27 22:55:38 +08:00
|
|
|
const struct ethnl_request_ops *request_ops,
|
|
|
|
struct netlink_ext_ack *extack, bool require_dev)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2020-03-13 04:07:38 +08:00
|
|
|
ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr],
|
|
|
|
net, extack, require_dev);
|
2019-12-27 22:55:38 +08:00
|
|
|
if (ret < 0)
|
2020-10-06 06:07:33 +08:00
|
|
|
return ret;
|
2019-12-27 22:55:38 +08:00
|
|
|
|
|
|
|
if (request_ops->parse_request) {
|
|
|
|
ret = request_ops->parse_request(req_info, tb, extack);
|
|
|
|
if (ret < 0)
|
2020-10-06 06:07:33 +08:00
|
|
|
return ret;
|
2019-12-27 22:55:38 +08:00
|
|
|
}
|
|
|
|
|
2020-10-06 06:07:33 +08:00
|
|
|
return 0;
|
2019-12-27 22:55:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ethnl_init_reply_data() - Initialize reply data for GET request
|
2020-01-27 06:11:01 +08:00
|
|
|
* @reply_data: pointer to embedded struct ethnl_reply_data
|
|
|
|
* @ops: instance of struct ethnl_request_ops describing the layout
|
|
|
|
* @dev: network device to initialize the reply for
|
2019-12-27 22:55:38 +08:00
|
|
|
*
|
|
|
|
* Fills the reply data part with zeros and sets the dev member. Must be called
|
|
|
|
* before calling the ->fill_reply() callback (for each iteration when handling
|
|
|
|
* dump requests).
|
|
|
|
*/
|
|
|
|
static void ethnl_init_reply_data(struct ethnl_reply_data *reply_data,
|
|
|
|
const struct ethnl_request_ops *ops,
|
|
|
|
struct net_device *dev)
|
|
|
|
{
|
|
|
|
memset(reply_data, 0, ops->reply_data_size);
|
|
|
|
reply_data->dev = dev;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* default ->doit() handler for GET type requests */
|
|
|
|
static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
|
|
|
|
{
|
|
|
|
struct ethnl_reply_data *reply_data = NULL;
|
|
|
|
struct ethnl_req_info *req_info = NULL;
|
|
|
|
const u8 cmd = info->genlhdr->cmd;
|
|
|
|
const struct ethnl_request_ops *ops;
|
2021-06-16 11:33:38 +08:00
|
|
|
int hdr_len, reply_len;
|
2019-12-27 22:55:38 +08:00
|
|
|
struct sk_buff *rskb;
|
|
|
|
void *reply_payload;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ops = ethnl_default_requests[cmd];
|
|
|
|
if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
|
|
|
|
return -EOPNOTSUPP;
|
2022-08-26 11:09:35 +08:00
|
|
|
if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2019-12-27 22:55:38 +08:00
|
|
|
req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
|
|
|
|
if (!req_info)
|
|
|
|
return -ENOMEM;
|
|
|
|
reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
|
|
|
|
if (!reply_data) {
|
|
|
|
kfree(req_info);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2020-10-06 06:07:33 +08:00
|
|
|
ret = ethnl_default_parse(req_info, info->attrs, genl_info_net(info),
|
|
|
|
ops, info->extack, !ops->allow_nodev_do);
|
2019-12-27 22:55:38 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_dev;
|
|
|
|
ethnl_init_reply_data(reply_data, ops, req_info->dev);
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
ret = ops->prepare_data(req_info, reply_data, info);
|
|
|
|
rtnl_unlock();
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_cleanup;
|
2020-01-08 13:41:25 +08:00
|
|
|
ret = ops->reply_size(req_info, reply_data);
|
2019-12-27 22:55:38 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_cleanup;
|
2021-06-16 11:33:38 +08:00
|
|
|
reply_len = ret;
|
2019-12-27 22:55:38 +08:00
|
|
|
ret = -ENOMEM;
|
2021-06-16 11:33:38 +08:00
|
|
|
rskb = ethnl_reply_init(reply_len + ethnl_reply_header_size(),
|
|
|
|
req_info->dev, ops->reply_cmd,
|
2019-12-27 22:55:38 +08:00
|
|
|
ops->hdr_attr, info, &reply_payload);
|
|
|
|
if (!rskb)
|
|
|
|
goto err_cleanup;
|
2021-06-16 11:33:38 +08:00
|
|
|
hdr_len = rskb->len;
|
2019-12-27 22:55:38 +08:00
|
|
|
ret = ops->fill_reply(rskb, req_info, reply_data);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_msg;
|
2021-06-16 11:33:38 +08:00
|
|
|
WARN_ONCE(rskb->len - hdr_len > reply_len,
|
|
|
|
"ethnl cmd %d: calculated reply length %d, but consumed %d\n",
|
|
|
|
cmd, reply_len, rskb->len - hdr_len);
|
2019-12-27 22:55:38 +08:00
|
|
|
if (ops->cleanup_data)
|
|
|
|
ops->cleanup_data(reply_data);
|
|
|
|
|
|
|
|
genlmsg_end(rskb, reply_payload);
|
2022-06-08 12:39:55 +08:00
|
|
|
netdev_put(req_info->dev, &req_info->dev_tracker);
|
2019-12-27 22:55:38 +08:00
|
|
|
kfree(reply_data);
|
|
|
|
kfree(req_info);
|
|
|
|
return genlmsg_reply(rskb, info);
|
|
|
|
|
|
|
|
err_msg:
|
|
|
|
WARN_ONCE(ret == -EMSGSIZE, "calculated message payload length (%d) not sufficient\n", reply_len);
|
|
|
|
nlmsg_free(rskb);
|
|
|
|
err_cleanup:
|
|
|
|
if (ops->cleanup_data)
|
|
|
|
ops->cleanup_data(reply_data);
|
|
|
|
err_dev:
|
2022-06-08 12:39:55 +08:00
|
|
|
netdev_put(req_info->dev, &req_info->dev_tracker);
|
2019-12-27 22:55:38 +08:00
|
|
|
kfree(reply_data);
|
|
|
|
kfree(req_info);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
|
2020-07-09 18:11:50 +08:00
|
|
|
const struct ethnl_dump_ctx *ctx,
|
|
|
|
struct netlink_callback *cb)
|
2019-12-27 22:55:38 +08:00
|
|
|
{
|
2020-07-09 18:11:50 +08:00
|
|
|
void *ehdr;
|
2019-12-27 22:55:38 +08:00
|
|
|
int ret;
|
|
|
|
|
2020-07-09 18:11:50 +08:00
|
|
|
ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
2021-05-05 06:47:14 +08:00
|
|
|
ðtool_genl_family, NLM_F_MULTI,
|
|
|
|
ctx->ops->reply_cmd);
|
2020-07-09 18:11:50 +08:00
|
|
|
if (!ehdr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2019-12-27 22:55:38 +08:00
|
|
|
ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
|
|
|
|
rtnl_lock();
|
|
|
|
ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
|
|
|
|
rtnl_unlock();
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
ret = ctx->ops->fill_reply(skb, ctx->req_info, ctx->reply_data);
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (ctx->ops->cleanup_data)
|
|
|
|
ctx->ops->cleanup_data(ctx->reply_data);
|
|
|
|
ctx->reply_data->dev = NULL;
|
2020-07-09 18:11:50 +08:00
|
|
|
if (ret < 0)
|
|
|
|
genlmsg_cancel(skb, ehdr);
|
|
|
|
else
|
|
|
|
genlmsg_end(skb, ehdr);
|
2019-12-27 22:55:38 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Default ->dumpit() handler for GET requests. Device iteration copied from
|
|
|
|
* rtnl_dump_ifinfo(); we have to be more careful about device hashtable
|
|
|
|
* persistence as we cannot guarantee to hold RTNL lock through the whole
|
|
|
|
* function as rtnetnlink does.
|
|
|
|
*/
|
|
|
|
static int ethnl_default_dumpit(struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
|
|
|
|
struct net *net = sock_net(skb->sk);
|
|
|
|
int s_idx = ctx->pos_idx;
|
|
|
|
int h, idx = 0;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
|
|
|
|
struct hlist_head *head;
|
|
|
|
struct net_device *dev;
|
|
|
|
unsigned int seq;
|
|
|
|
|
|
|
|
head = &net->dev_index_head[h];
|
|
|
|
|
|
|
|
restart_chain:
|
|
|
|
seq = net->dev_base_seq;
|
|
|
|
cb->seq = seq;
|
|
|
|
idx = 0;
|
|
|
|
hlist_for_each_entry(dev, head, index_hlist) {
|
|
|
|
if (idx < s_idx)
|
|
|
|
goto cont;
|
|
|
|
dev_hold(dev);
|
|
|
|
rtnl_unlock();
|
|
|
|
|
2020-07-09 18:11:50 +08:00
|
|
|
ret = ethnl_default_dump_one(skb, dev, ctx, cb);
|
2019-12-27 22:55:38 +08:00
|
|
|
dev_put(dev);
|
|
|
|
if (ret < 0) {
|
|
|
|
if (ret == -EOPNOTSUPP)
|
|
|
|
goto lock_and_cont;
|
|
|
|
if (likely(skb->len))
|
|
|
|
ret = skb->len;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
lock_and_cont:
|
|
|
|
rtnl_lock();
|
|
|
|
if (net->dev_base_seq != seq) {
|
|
|
|
s_idx = idx + 1;
|
|
|
|
goto restart_chain;
|
|
|
|
}
|
|
|
|
cont:
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
rtnl_unlock();
|
|
|
|
|
|
|
|
out:
|
|
|
|
ctx->pos_hash = h;
|
|
|
|
ctx->pos_idx = idx;
|
|
|
|
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* generic ->start() handler for GET requests */
|
|
|
|
static int ethnl_default_start(struct netlink_callback *cb)
|
|
|
|
{
|
2020-10-06 06:07:33 +08:00
|
|
|
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
|
2019-12-27 22:55:38 +08:00
|
|
|
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
|
|
|
|
struct ethnl_reply_data *reply_data;
|
|
|
|
const struct ethnl_request_ops *ops;
|
|
|
|
struct ethnl_req_info *req_info;
|
|
|
|
struct genlmsghdr *ghdr;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
|
|
|
|
|
|
|
|
ghdr = nlmsg_data(cb->nlh);
|
|
|
|
ops = ethnl_default_requests[ghdr->cmd];
|
|
|
|
if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", ghdr->cmd))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
|
|
|
|
if (!req_info)
|
|
|
|
return -ENOMEM;
|
|
|
|
reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
|
|
|
|
if (!reply_data) {
|
2020-01-08 13:39:48 +08:00
|
|
|
ret = -ENOMEM;
|
|
|
|
goto free_req_info;
|
2019-12-27 22:55:38 +08:00
|
|
|
}
|
|
|
|
|
2020-10-06 06:07:33 +08:00
|
|
|
ret = ethnl_default_parse(req_info, info->attrs, sock_net(cb->skb->sk),
|
|
|
|
ops, cb->extack, false);
|
2019-12-27 22:55:38 +08:00
|
|
|
if (req_info->dev) {
|
|
|
|
/* We ignore device specification in dump requests but as the
|
|
|
|
* same parser as for non-dump (doit) requests is used, it
|
|
|
|
* would take reference to the device if it finds one
|
|
|
|
*/
|
2022-06-08 12:39:55 +08:00
|
|
|
netdev_put(req_info->dev, &req_info->dev_tracker);
|
2019-12-27 22:55:38 +08:00
|
|
|
req_info->dev = NULL;
|
|
|
|
}
|
|
|
|
if (ret < 0)
|
2020-01-08 13:39:48 +08:00
|
|
|
goto free_reply_data;
|
2019-12-27 22:55:38 +08:00
|
|
|
|
|
|
|
ctx->ops = ops;
|
|
|
|
ctx->req_info = req_info;
|
|
|
|
ctx->reply_data = reply_data;
|
|
|
|
ctx->pos_hash = 0;
|
|
|
|
ctx->pos_idx = 0;
|
|
|
|
|
|
|
|
return 0;
|
2020-01-08 13:39:48 +08:00
|
|
|
|
|
|
|
free_reply_data:
|
|
|
|
kfree(reply_data);
|
|
|
|
free_req_info:
|
|
|
|
kfree(req_info);
|
|
|
|
|
|
|
|
return ret;
|
2019-12-27 22:55:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* default ->done() handler for GET requests */
|
|
|
|
static int ethnl_default_done(struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
|
|
|
|
|
|
|
|
kfree(ctx->reply_data);
|
|
|
|
kfree(ctx->req_info);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-01-26 07:05:18 +08:00
|
|
|
static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info)
|
|
|
|
{
|
|
|
|
const struct ethnl_request_ops *ops;
|
|
|
|
struct ethnl_req_info req_info = {};
|
|
|
|
const u8 cmd = info->genlhdr->cmd;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ops = ethnl_default_requests[cmd];
|
|
|
|
if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ret = ethnl_parse_header_dev_get(&req_info, info->attrs[ops->hdr_attr],
|
|
|
|
genl_info_net(info), info->extack,
|
|
|
|
true);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (ops->set_validate) {
|
|
|
|
ret = ops->set_validate(&req_info, info);
|
|
|
|
/* 0 means nothing to do */
|
|
|
|
if (ret <= 0)
|
|
|
|
goto out_dev;
|
|
|
|
}
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
ret = ethnl_ops_begin(req_info.dev);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out_rtnl;
|
|
|
|
|
|
|
|
ret = ops->set(&req_info, info);
|
|
|
|
if (ret <= 0)
|
|
|
|
goto out_ops;
|
|
|
|
ethtool_notify(req_info.dev, ops->set_ntf_cmd, NULL);
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
out_ops:
|
|
|
|
ethnl_ops_complete(req_info.dev);
|
|
|
|
out_rtnl:
|
|
|
|
rtnl_unlock();
|
|
|
|
out_dev:
|
|
|
|
ethnl_parse_header_dev_put(&req_info);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-12-27 22:55:58 +08:00
|
|
|
static const struct ethnl_request_ops *
|
|
|
|
ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
|
2019-12-27 22:56:03 +08:00
|
|
|
[ETHTOOL_MSG_LINKINFO_NTF] = ðnl_linkinfo_request_ops,
|
2019-12-27 22:56:18 +08:00
|
|
|
[ETHTOOL_MSG_LINKMODES_NTF] = ðnl_linkmodes_request_ops,
|
2020-01-27 06:11:10 +08:00
|
|
|
[ETHTOOL_MSG_DEBUG_NTF] = ðnl_debug_request_ops,
|
2020-01-27 06:11:19 +08:00
|
|
|
[ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops,
|
2020-03-13 04:08:03 +08:00
|
|
|
[ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops,
|
2020-03-13 04:08:18 +08:00
|
|
|
[ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops,
|
2020-03-13 04:08:33 +08:00
|
|
|
[ETHTOOL_MSG_RINGS_NTF] = ðnl_rings_request_ops,
|
2020-03-13 04:08:48 +08:00
|
|
|
[ETHTOOL_MSG_CHANNELS_NTF] = ðnl_channels_request_ops,
|
2020-03-28 07:01:18 +08:00
|
|
|
[ETHTOOL_MSG_COALESCE_NTF] = ðnl_coalesce_request_ops,
|
2020-03-28 07:01:33 +08:00
|
|
|
[ETHTOOL_MSG_PAUSE_NTF] = ðnl_pause_request_ops,
|
2020-03-28 07:01:48 +08:00
|
|
|
[ETHTOOL_MSG_EEE_NTF] = ðnl_eee_request_ops,
|
2021-03-30 11:59:52 +08:00
|
|
|
[ETHTOOL_MSG_FEC_NTF] = ðnl_fec_request_ops,
|
ethtool: Add ability to control transceiver modules' power mode
Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and
'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver
modules parameters and retrieve their status.
The first parameter to control is the power mode of the module. It is
only relevant for paged memory modules, as flat memory modules always
operate in low power mode.
When a paged memory module is in low power mode, its power consumption
is reduced to the minimum, the management interface towards the host is
available and the data path is deactivated.
User space can choose to put modules that are not currently in use in
low power mode and transition them to high power mode before putting the
associated ports administratively up. This is useful for user space that
favors reduced power consumption and lower temperatures over reduced
link up times. In QSFP-DD modules the transition from low power mode to
high power mode can take a few seconds and this transition is only
expected to get longer with future / more complex modules.
User space can control the power mode of the module via the power mode
policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible
values:
* high: Module is always in high power mode.
* auto: Module is transitioned by the host to high power mode when the
first port using it is put administratively up and to low power mode
when the last port using it is put administratively down.
The operational power mode of the module is available to user space via
the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not
reported to user space when a module is not plugged-in.
The user API is designed to be generic enough so that it could be used
for modules with different memory maps (e.g., SFF-8636, CMIS).
The only implementation of the device driver API in this series is for a
MAC driver (mlxsw) where the module is controlled by the device's
firmware, but it is designed to be generic enough so that it could also
be used by implementations where the module is controlled by the CPU.
CMIS testing
============
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
The module is not in low power mode, as it is not forced by hardware
(LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off).
The power mode can be queried from the kernel. In case
LowPwrAllowRequestHW was on, the kernel would need to take into account
the state of the LowPwrRequestHW signal, which is not visible to user
space.
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp11 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp11 up
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp11 down
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
SFF-8636 testing
================
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7733 mW / -1.12 dBm
Transmit avg optical power (Channel 2) : 0.7649 mW / -1.16 dBm
Transmit avg optical power (Channel 3) : 0.7790 mW / -1.08 dBm
Transmit avg optical power (Channel 4) : 0.7837 mW / -1.06 dBm
Rcvr signal avg optical power(Channel 1) : 0.9302 mW / -0.31 dBm
Rcvr signal avg optical power(Channel 2) : 0.9079 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 3) : 0.8993 mW / -0.46 dBm
Rcvr signal avg optical power(Channel 4) : 0.8778 mW / -0.57 dBm
The module is not in low power mode, as it is not forced by hardware
(Power override is on) or by software (Power set is off).
The power mode can be queried from the kernel. In case Power override
was off, the kernel would need to take into account the state of the
LPMode signal, which is not visible to user space.
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp13 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp13 up
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7934 mW / -1.01 dBm
Transmit avg optical power (Channel 2) : 0.7859 mW / -1.05 dBm
Transmit avg optical power (Channel 3) : 0.7885 mW / -1.03 dBm
Transmit avg optical power (Channel 4) : 0.7985 mW / -0.98 dBm
Rcvr signal avg optical power(Channel 1) : 0.9325 mW / -0.30 dBm
Rcvr signal avg optical power(Channel 2) : 0.9034 mW / -0.44 dBm
Rcvr signal avg optical power(Channel 3) : 0.9086 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 4) : 0.8885 mW / -0.51 dBm
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp13 down
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-06 18:46:42 +08:00
|
|
|
[ETHTOOL_MSG_MODULE_NTF] = ðnl_module_request_ops,
|
2023-01-10 00:59:39 +08:00
|
|
|
[ETHTOOL_MSG_PLCA_NTF] = ðnl_plca_cfg_request_ops,
|
net: ethtool: add support for MAC Merge layer
The MAC merge sublayer (IEEE 802.3-2018 clause 99) is one of 2
specifications (the other being Frame Preemption; IEEE 802.1Q-2018
clause 6.7.2), which work together to minimize latency caused by frame
interference at TX. The overall goal of TSN is for normal traffic and
traffic with a bounded deadline to be able to cohabitate on the same L2
network and not bother each other too much.
The standards achieve this (partly) by introducing the concept of
preemptible traffic, i.e. Ethernet frames that have a custom value for
the Start-of-Frame-Delimiter (SFD), and these frames can be fragmented
and reassembled at L2 on a link-local basis. The non-preemptible frames
are called express traffic, they are transmitted using a normal SFD, and
they can preempt preemptible frames, therefore having lower latency,
which can matter at lower (100 Mbps) link speeds, or at high MTUs (jumbo
frames around 9K). Preemption is not recursive, i.e. a P frame cannot
preempt another P frame. Preemption also does not depend upon priority,
or otherwise said, an E frame with prio 0 will still preempt a P frame
with prio 7.
In terms of implementation, the standards talk about the presence of an
express MAC (eMAC) which handles express traffic, and a preemptible MAC
(pMAC) which handles preemptible traffic, and these MACs are multiplexed
on the same MII by a MAC merge layer.
To support frame preemption, the definition of the SFD was generalized
to SMD (Start-of-mPacket-Delimiter), where an mPacket is essentially an
Ethernet frame fragment, or a complete frame. Stations unaware of an SMD
value different from the standard SFD will treat P frames as error
frames. To prevent that from happening, a negotiation process is
defined.
On RX, packets are dispatched to the eMAC or pMAC after being filtered
by their SMD. On TX, the eMAC/pMAC classification decision is taken by
the 802.1Q spec, based on packet priority (each of the 8 user priority
values may have an admin-status of preemptible or express).
The MAC Merge layer and the Frame Preemption parameters have some degree
of independence in terms of how software stacks are supposed to deal
with them. The activation of the MM layer is supposed to be controlled
by an LLDP daemon (after it has been communicated that the link partner
also supports it), after which a (hardware-based or not) verification
handshake takes place, before actually enabling the feature. So the
process is intended to be relatively plug-and-play. Whereas FP settings
are supposed to be coordinated across a network using something
approximating NETCONF.
The support contained here is exclusively for the 802.3 (MAC Merge)
portions and not for the 802.1Q (Frame Preemption) parts. This API is
sufficient for an LLDP daemon to do its job. The FP adminStatus variable
from 802.1Q is outside the scope of an LLDP daemon.
I have taken a few creative licenses and augmented the Linux kernel UAPI
compared to the standard managed objects recommended by IEEE 802.3.
These are:
- ETHTOOL_A_MM_PMAC_ENABLED: According to Figure 99-6: Receive
Processing state diagram, a MAC Merge layer is always supposed to be
able to receive P frames. However, this implies keeping the pMAC
powered on, which will consume needless power in applications where FP
will never be used. If LLDP is used, the reception of an Additional
Ethernet Capabilities TLV from the link partner is sufficient
indication that the pMAC should be enabled. So my proposal is that in
Linux, we keep the pMAC turned off by default and that user space
turns it on when needed.
- ETHTOOL_A_MM_VERIFY_ENABLED: The IEEE managed object is called
aMACMergeVerifyDisableTx. I opted for consistency (positive logic) in
the boolean netlink attributes offered, so this is also positive here.
Other than the meaning being reversed, they correspond to the same
thing.
- ETHTOOL_A_MM_MAX_VERIFY_TIME: I found it most reasonable for a LLDP
daemon to maximize the verifyTime variable (delay between SMD-V
transmissions), to maximize its chances that the LP replies. IEEE says
that the verifyTime can range between 1 and 128 ms, but the NXP ENETC
stupidly keeps this variable in a 7 bit register, so the maximum
supported value is 127 ms. I could have chosen to hardcode this in the
LLDP daemon to a lower value, but why not let the kernel expose its
supported range directly.
- ETHTOOL_A_MM_TX_MIN_FRAG_SIZE: the standard managed object is called
aMACMergeAddFragSize, and expresses the "additional" fragment size
(on top of ETH_ZLEN), whereas this expresses the absolute value of the
fragment size.
- ETHTOOL_A_MM_RX_MIN_FRAG_SIZE: there doesn't appear to exist a managed
object mandated by the standard, but user space clearly needs to know
what is the minimum supported fragment size of our local receiver,
since LLDP must advertise a value no lower than that.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-01-19 20:26:54 +08:00
|
|
|
[ETHTOOL_MSG_MM_NTF] = ðnl_mm_request_ops,
|
2019-12-27 22:55:58 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* default notification handler */
|
|
|
|
static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
|
|
|
|
const void *data)
|
|
|
|
{
|
|
|
|
struct ethnl_reply_data *reply_data;
|
|
|
|
const struct ethnl_request_ops *ops;
|
|
|
|
struct ethnl_req_info *req_info;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
void *reply_payload;
|
|
|
|
int reply_len;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (WARN_ONCE(cmd > ETHTOOL_MSG_KERNEL_MAX ||
|
|
|
|
!ethnl_default_notify_ops[cmd],
|
|
|
|
"unexpected notification type %u\n", cmd))
|
|
|
|
return;
|
|
|
|
ops = ethnl_default_notify_ops[cmd];
|
|
|
|
req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
|
|
|
|
if (!req_info)
|
|
|
|
return;
|
|
|
|
reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
|
|
|
|
if (!reply_data) {
|
|
|
|
kfree(req_info);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
req_info->dev = dev;
|
|
|
|
req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;
|
|
|
|
|
|
|
|
ethnl_init_reply_data(reply_data, ops, dev);
|
|
|
|
ret = ops->prepare_data(req_info, reply_data, NULL);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_cleanup;
|
2020-01-08 13:41:25 +08:00
|
|
|
ret = ops->reply_size(req_info, reply_data);
|
2019-12-27 22:55:58 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_cleanup;
|
2020-05-11 03:04:09 +08:00
|
|
|
reply_len = ret + ethnl_reply_header_size();
|
2019-12-27 22:55:58 +08:00
|
|
|
skb = genlmsg_new(reply_len, GFP_KERNEL);
|
|
|
|
if (!skb)
|
|
|
|
goto err_cleanup;
|
|
|
|
reply_payload = ethnl_bcastmsg_put(skb, cmd);
|
|
|
|
if (!reply_payload)
|
|
|
|
goto err_skb;
|
|
|
|
ret = ethnl_fill_reply_header(skb, dev, ops->hdr_attr);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_msg;
|
|
|
|
ret = ops->fill_reply(skb, req_info, reply_data);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_msg;
|
|
|
|
if (ops->cleanup_data)
|
|
|
|
ops->cleanup_data(reply_data);
|
|
|
|
|
|
|
|
genlmsg_end(skb, reply_payload);
|
|
|
|
kfree(reply_data);
|
|
|
|
kfree(req_info);
|
|
|
|
ethnl_multicast(skb, dev);
|
|
|
|
return;
|
|
|
|
|
|
|
|
err_msg:
|
|
|
|
WARN_ONCE(ret == -EMSGSIZE,
|
|
|
|
"calculated message payload length (%d) not sufficient\n",
|
|
|
|
reply_len);
|
|
|
|
err_skb:
|
|
|
|
nlmsg_free(skb);
|
|
|
|
err_cleanup:
|
|
|
|
if (ops->cleanup_data)
|
|
|
|
ops->cleanup_data(reply_data);
|
|
|
|
kfree(reply_data);
|
|
|
|
kfree(req_info);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-12-27 22:55:33 +08:00
|
|
|
/* notifications */
|
|
|
|
|
|
|
|
typedef void (*ethnl_notify_handler_t)(struct net_device *dev, unsigned int cmd,
|
|
|
|
const void *data);
|
|
|
|
|
|
|
|
static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
|
2019-12-27 22:56:03 +08:00
|
|
|
[ETHTOOL_MSG_LINKINFO_NTF] = ethnl_default_notify,
|
2019-12-27 22:56:18 +08:00
|
|
|
[ETHTOOL_MSG_LINKMODES_NTF] = ethnl_default_notify,
|
2020-01-27 06:11:10 +08:00
|
|
|
[ETHTOOL_MSG_DEBUG_NTF] = ethnl_default_notify,
|
2020-01-27 06:11:19 +08:00
|
|
|
[ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify,
|
2020-03-13 04:08:03 +08:00
|
|
|
[ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify,
|
2020-03-13 04:08:18 +08:00
|
|
|
[ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify,
|
2020-03-13 04:08:33 +08:00
|
|
|
[ETHTOOL_MSG_RINGS_NTF] = ethnl_default_notify,
|
2020-03-13 04:08:48 +08:00
|
|
|
[ETHTOOL_MSG_CHANNELS_NTF] = ethnl_default_notify,
|
2020-03-28 07:01:18 +08:00
|
|
|
[ETHTOOL_MSG_COALESCE_NTF] = ethnl_default_notify,
|
2020-03-28 07:01:33 +08:00
|
|
|
[ETHTOOL_MSG_PAUSE_NTF] = ethnl_default_notify,
|
2020-03-28 07:01:48 +08:00
|
|
|
[ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify,
|
2021-03-30 11:59:52 +08:00
|
|
|
[ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify,
|
ethtool: Add ability to control transceiver modules' power mode
Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and
'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver
modules parameters and retrieve their status.
The first parameter to control is the power mode of the module. It is
only relevant for paged memory modules, as flat memory modules always
operate in low power mode.
When a paged memory module is in low power mode, its power consumption
is reduced to the minimum, the management interface towards the host is
available and the data path is deactivated.
User space can choose to put modules that are not currently in use in
low power mode and transition them to high power mode before putting the
associated ports administratively up. This is useful for user space that
favors reduced power consumption and lower temperatures over reduced
link up times. In QSFP-DD modules the transition from low power mode to
high power mode can take a few seconds and this transition is only
expected to get longer with future / more complex modules.
User space can control the power mode of the module via the power mode
policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible
values:
* high: Module is always in high power mode.
* auto: Module is transitioned by the host to high power mode when the
first port using it is put administratively up and to low power mode
when the last port using it is put administratively down.
The operational power mode of the module is available to user space via
the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not
reported to user space when a module is not plugged-in.
The user API is designed to be generic enough so that it could be used
for modules with different memory maps (e.g., SFF-8636, CMIS).
The only implementation of the device driver API in this series is for a
MAC driver (mlxsw) where the module is controlled by the device's
firmware, but it is designed to be generic enough so that it could also
be used by implementations where the module is controlled by the CPU.
CMIS testing
============
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
The module is not in low power mode, as it is not forced by hardware
(LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off).
The power mode can be queried from the kernel. In case
LowPwrAllowRequestHW was on, the kernel would need to take into account
the state of the LowPwrRequestHW signal, which is not visible to user
space.
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp11 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp11 up
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp11 down
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
SFF-8636 testing
================
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7733 mW / -1.12 dBm
Transmit avg optical power (Channel 2) : 0.7649 mW / -1.16 dBm
Transmit avg optical power (Channel 3) : 0.7790 mW / -1.08 dBm
Transmit avg optical power (Channel 4) : 0.7837 mW / -1.06 dBm
Rcvr signal avg optical power(Channel 1) : 0.9302 mW / -0.31 dBm
Rcvr signal avg optical power(Channel 2) : 0.9079 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 3) : 0.8993 mW / -0.46 dBm
Rcvr signal avg optical power(Channel 4) : 0.8778 mW / -0.57 dBm
The module is not in low power mode, as it is not forced by hardware
(Power override is on) or by software (Power set is off).
The power mode can be queried from the kernel. In case Power override
was off, the kernel would need to take into account the state of the
LPMode signal, which is not visible to user space.
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp13 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp13 up
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7934 mW / -1.01 dBm
Transmit avg optical power (Channel 2) : 0.7859 mW / -1.05 dBm
Transmit avg optical power (Channel 3) : 0.7885 mW / -1.03 dBm
Transmit avg optical power (Channel 4) : 0.7985 mW / -0.98 dBm
Rcvr signal avg optical power(Channel 1) : 0.9325 mW / -0.30 dBm
Rcvr signal avg optical power(Channel 2) : 0.9034 mW / -0.44 dBm
Rcvr signal avg optical power(Channel 3) : 0.9086 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 4) : 0.8885 mW / -0.51 dBm
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp13 down
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-06 18:46:42 +08:00
|
|
|
[ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify,
|
2023-01-10 00:59:39 +08:00
|
|
|
[ETHTOOL_MSG_PLCA_NTF] = ethnl_default_notify,
|
net: ethtool: add support for MAC Merge layer
The MAC merge sublayer (IEEE 802.3-2018 clause 99) is one of 2
specifications (the other being Frame Preemption; IEEE 802.1Q-2018
clause 6.7.2), which work together to minimize latency caused by frame
interference at TX. The overall goal of TSN is for normal traffic and
traffic with a bounded deadline to be able to cohabitate on the same L2
network and not bother each other too much.
The standards achieve this (partly) by introducing the concept of
preemptible traffic, i.e. Ethernet frames that have a custom value for
the Start-of-Frame-Delimiter (SFD), and these frames can be fragmented
and reassembled at L2 on a link-local basis. The non-preemptible frames
are called express traffic, they are transmitted using a normal SFD, and
they can preempt preemptible frames, therefore having lower latency,
which can matter at lower (100 Mbps) link speeds, or at high MTUs (jumbo
frames around 9K). Preemption is not recursive, i.e. a P frame cannot
preempt another P frame. Preemption also does not depend upon priority,
or otherwise said, an E frame with prio 0 will still preempt a P frame
with prio 7.
In terms of implementation, the standards talk about the presence of an
express MAC (eMAC) which handles express traffic, and a preemptible MAC
(pMAC) which handles preemptible traffic, and these MACs are multiplexed
on the same MII by a MAC merge layer.
To support frame preemption, the definition of the SFD was generalized
to SMD (Start-of-mPacket-Delimiter), where an mPacket is essentially an
Ethernet frame fragment, or a complete frame. Stations unaware of an SMD
value different from the standard SFD will treat P frames as error
frames. To prevent that from happening, a negotiation process is
defined.
On RX, packets are dispatched to the eMAC or pMAC after being filtered
by their SMD. On TX, the eMAC/pMAC classification decision is taken by
the 802.1Q spec, based on packet priority (each of the 8 user priority
values may have an admin-status of preemptible or express).
The MAC Merge layer and the Frame Preemption parameters have some degree
of independence in terms of how software stacks are supposed to deal
with them. The activation of the MM layer is supposed to be controlled
by an LLDP daemon (after it has been communicated that the link partner
also supports it), after which a (hardware-based or not) verification
handshake takes place, before actually enabling the feature. So the
process is intended to be relatively plug-and-play. Whereas FP settings
are supposed to be coordinated across a network using something
approximating NETCONF.
The support contained here is exclusively for the 802.3 (MAC Merge)
portions and not for the 802.1Q (Frame Preemption) parts. This API is
sufficient for an LLDP daemon to do its job. The FP adminStatus variable
from 802.1Q is outside the scope of an LLDP daemon.
I have taken a few creative licenses and augmented the Linux kernel UAPI
compared to the standard managed objects recommended by IEEE 802.3.
These are:
- ETHTOOL_A_MM_PMAC_ENABLED: According to Figure 99-6: Receive
Processing state diagram, a MAC Merge layer is always supposed to be
able to receive P frames. However, this implies keeping the pMAC
powered on, which will consume needless power in applications where FP
will never be used. If LLDP is used, the reception of an Additional
Ethernet Capabilities TLV from the link partner is sufficient
indication that the pMAC should be enabled. So my proposal is that in
Linux, we keep the pMAC turned off by default and that user space
turns it on when needed.
- ETHTOOL_A_MM_VERIFY_ENABLED: The IEEE managed object is called
aMACMergeVerifyDisableTx. I opted for consistency (positive logic) in
the boolean netlink attributes offered, so this is also positive here.
Other than the meaning being reversed, they correspond to the same
thing.
- ETHTOOL_A_MM_MAX_VERIFY_TIME: I found it most reasonable for a LLDP
daemon to maximize the verifyTime variable (delay between SMD-V
transmissions), to maximize its chances that the LP replies. IEEE says
that the verifyTime can range between 1 and 128 ms, but the NXP ENETC
stupidly keeps this variable in a 7 bit register, so the maximum
supported value is 127 ms. I could have chosen to hardcode this in the
LLDP daemon to a lower value, but why not let the kernel expose its
supported range directly.
- ETHTOOL_A_MM_TX_MIN_FRAG_SIZE: the standard managed object is called
aMACMergeAddFragSize, and expresses the "additional" fragment size
(on top of ETH_ZLEN), whereas this expresses the absolute value of the
fragment size.
- ETHTOOL_A_MM_RX_MIN_FRAG_SIZE: there doesn't appear to exist a managed
object mandated by the standard, but user space clearly needs to know
what is the minimum supported fragment size of our local receiver,
since LLDP must advertise a value no lower than that.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-01-19 20:26:54 +08:00
|
|
|
[ETHTOOL_MSG_MM_NTF] = ethnl_default_notify,
|
2019-12-27 22:55:33 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
|
|
|
|
{
|
|
|
|
if (unlikely(!ethnl_ok))
|
|
|
|
return;
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
if (likely(cmd < ARRAY_SIZE(ethnl_notify_handlers) &&
|
|
|
|
ethnl_notify_handlers[cmd]))
|
|
|
|
ethnl_notify_handlers[cmd](dev, cmd, data);
|
|
|
|
else
|
|
|
|
WARN_ONCE(1, "notification %u not implemented (dev=%s)\n",
|
|
|
|
cmd, netdev_name(dev));
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ethtool_notify);
|
|
|
|
|
2020-03-13 04:08:03 +08:00
|
|
|
static void ethnl_notify_features(struct netdev_notifier_info *info)
|
|
|
|
{
|
|
|
|
struct net_device *dev = netdev_notifier_info_to_dev(info);
|
|
|
|
|
|
|
|
ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ethnl_netdev_event(struct notifier_block *this, unsigned long event,
|
|
|
|
void *ptr)
|
|
|
|
{
|
|
|
|
switch (event) {
|
|
|
|
case NETDEV_FEAT_CHANGE:
|
|
|
|
ethnl_notify_features(ptr);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block ethnl_netdev_notifier = {
|
|
|
|
.notifier_call = ethnl_netdev_event,
|
|
|
|
};
|
|
|
|
|
2019-12-27 22:55:18 +08:00
|
|
|
/* genetlink setup */
|
|
|
|
|
|
|
|
static const struct genl_ops ethtool_genl_ops[] = {
|
2019-12-27 22:55:43 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_STRSET_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_strset_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_strset_get_policy) - 1,
|
2019-12-27 22:55:43 +08:00
|
|
|
},
|
2019-12-27 22:55:48 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_LINKINFO_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_linkinfo_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_linkinfo_get_policy) - 1,
|
2019-12-27 22:55:48 +08:00
|
|
|
},
|
2019-12-27 22:55:53 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_LINKINFO_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_linkinfo_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_linkinfo_set_policy) - 1,
|
2019-12-27 22:55:53 +08:00
|
|
|
},
|
2019-12-27 22:56:08 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_LINKMODES_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_linkmodes_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_linkmodes_get_policy) - 1,
|
2019-12-27 22:56:08 +08:00
|
|
|
},
|
2019-12-27 22:56:13 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_LINKMODES_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_linkmodes_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_linkmodes_set_policy) - 1,
|
2019-12-27 22:56:13 +08:00
|
|
|
},
|
2019-12-27 22:56:23 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_LINKSTATE_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_linkstate_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_linkstate_get_policy) - 1,
|
2019-12-27 22:56:23 +08:00
|
|
|
},
|
2020-01-27 06:11:04 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_DEBUG_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_debug_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_debug_get_policy) - 1,
|
2020-01-27 06:11:04 +08:00
|
|
|
},
|
2020-01-27 06:11:07 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_DEBUG_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_debug_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_debug_set_policy) - 1,
|
2020-01-27 06:11:07 +08:00
|
|
|
},
|
2020-01-27 06:11:13 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_WOL_GET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_wol_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_wol_get_policy) - 1,
|
2020-01-27 06:11:13 +08:00
|
|
|
},
|
2020-01-27 06:11:16 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_WOL_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_wol_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_wol_set_policy) - 1,
|
2020-01-27 06:11:16 +08:00
|
|
|
},
|
2020-03-13 04:07:48 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_FEATURES_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_features_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_features_get_policy) - 1,
|
2020-03-13 04:07:48 +08:00
|
|
|
},
|
2020-03-13 04:07:58 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_FEATURES_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
|
|
|
.doit = ethnl_set_features,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_features_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_features_set_policy) - 1,
|
2020-03-13 04:07:58 +08:00
|
|
|
},
|
2020-03-13 04:08:08 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PRIVFLAGS_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_privflags_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_privflags_get_policy) - 1,
|
2020-03-13 04:08:08 +08:00
|
|
|
},
|
2020-03-13 04:08:13 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PRIVFLAGS_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_privflags_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_privflags_set_policy) - 1,
|
2020-03-13 04:08:13 +08:00
|
|
|
},
|
2020-03-13 04:08:23 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_RINGS_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_rings_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_rings_get_policy) - 1,
|
2020-03-13 04:08:23 +08:00
|
|
|
},
|
2020-03-13 04:08:28 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_RINGS_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_rings_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_rings_set_policy) - 1,
|
2020-03-13 04:08:28 +08:00
|
|
|
},
|
2020-03-13 04:08:38 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_CHANNELS_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_channels_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_channels_get_policy) - 1,
|
2020-03-13 04:08:38 +08:00
|
|
|
},
|
2020-03-13 04:08:43 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_CHANNELS_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-07 18:53:51 +08:00
|
|
|
.policy = ethnl_channels_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_channels_set_policy) - 1,
|
2020-03-13 04:08:43 +08:00
|
|
|
},
|
2020-03-28 07:01:08 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_COALESCE_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_coalesce_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_coalesce_get_policy) - 1,
|
2020-03-28 07:01:08 +08:00
|
|
|
},
|
2020-03-28 07:01:13 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_COALESCE_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_coalesce_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_coalesce_set_policy) - 1,
|
2020-03-28 07:01:13 +08:00
|
|
|
},
|
2020-03-28 07:01:23 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PAUSE_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_pause_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_pause_get_policy) - 1,
|
2020-03-28 07:01:23 +08:00
|
|
|
},
|
2020-03-28 07:01:28 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PAUSE_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:18 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_pause_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_pause_set_policy) - 1,
|
2020-03-28 07:01:28 +08:00
|
|
|
},
|
2020-03-28 07:01:38 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_EEE_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_eee_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_eee_get_policy) - 1,
|
2020-03-28 07:01:38 +08:00
|
|
|
},
|
2020-03-28 07:01:43 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_EEE_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_eee_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_eee_set_policy) - 1,
|
2020-03-28 07:01:43 +08:00
|
|
|
},
|
2020-03-28 07:01:58 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_TSINFO_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_tsinfo_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_tsinfo_get_policy) - 1,
|
2020-03-28 07:01:58 +08:00
|
|
|
},
|
2020-05-11 03:12:33 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_CABLE_TEST_ACT,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
|
|
|
.doit = ethnl_act_cable_test,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_cable_test_act_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_cable_test_act_policy) - 1,
|
2020-05-11 03:12:33 +08:00
|
|
|
},
|
2020-05-27 06:21:38 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
|
|
|
.doit = ethnl_act_cable_test_tdr,
|
2020-10-06 06:07:34 +08:00
|
|
|
.policy = ethnl_cable_test_tdr_act_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_cable_test_tdr_act_policy) - 1,
|
2020-05-27 06:21:38 +08:00
|
|
|
},
|
2020-07-10 08:42:47 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_TUNNEL_INFO_GET,
|
|
|
|
.doit = ethnl_tunnel_info_doit,
|
|
|
|
.start = ethnl_tunnel_info_start,
|
|
|
|
.dumpit = ethnl_tunnel_info_dumpit,
|
2020-10-06 06:07:33 +08:00
|
|
|
.policy = ethnl_tunnel_info_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
|
2020-07-10 08:42:47 +08:00
|
|
|
},
|
2021-03-30 11:59:52 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_FEC_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_fec_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_fec_get_policy) - 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_FEC_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2021-03-30 11:59:52 +08:00
|
|
|
.policy = ethnl_fec_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1,
|
|
|
|
},
|
2021-04-09 16:06:34 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_MODULE_EEPROM_GET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_module_eeprom_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) - 1,
|
|
|
|
},
|
2021-04-17 03:27:39 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_STATS_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_stats_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1,
|
|
|
|
},
|
2021-06-30 16:11:56 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_phc_vclocks_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1,
|
|
|
|
},
|
ethtool: Add ability to control transceiver modules' power mode
Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and
'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver
modules parameters and retrieve their status.
The first parameter to control is the power mode of the module. It is
only relevant for paged memory modules, as flat memory modules always
operate in low power mode.
When a paged memory module is in low power mode, its power consumption
is reduced to the minimum, the management interface towards the host is
available and the data path is deactivated.
User space can choose to put modules that are not currently in use in
low power mode and transition them to high power mode before putting the
associated ports administratively up. This is useful for user space that
favors reduced power consumption and lower temperatures over reduced
link up times. In QSFP-DD modules the transition from low power mode to
high power mode can take a few seconds and this transition is only
expected to get longer with future / more complex modules.
User space can control the power mode of the module via the power mode
policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible
values:
* high: Module is always in high power mode.
* auto: Module is transitioned by the host to high power mode when the
first port using it is put administratively up and to low power mode
when the last port using it is put administratively down.
The operational power mode of the module is available to user space via
the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not
reported to user space when a module is not plugged-in.
The user API is designed to be generic enough so that it could be used
for modules with different memory maps (e.g., SFF-8636, CMIS).
The only implementation of the device driver API in this series is for a
MAC driver (mlxsw) where the module is controlled by the device's
firmware, but it is designed to be generic enough so that it could also
be used by implementations where the module is controlled by the CPU.
CMIS testing
============
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
The module is not in low power mode, as it is not forced by hardware
(LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off).
The power mode can be queried from the kernel. In case
LowPwrAllowRequestHW was on, the kernel would need to take into account
the state of the LowPwrRequestHW signal, which is not visible to user
space.
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp11 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp11 up
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp11 down
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
SFF-8636 testing
================
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7733 mW / -1.12 dBm
Transmit avg optical power (Channel 2) : 0.7649 mW / -1.16 dBm
Transmit avg optical power (Channel 3) : 0.7790 mW / -1.08 dBm
Transmit avg optical power (Channel 4) : 0.7837 mW / -1.06 dBm
Rcvr signal avg optical power(Channel 1) : 0.9302 mW / -0.31 dBm
Rcvr signal avg optical power(Channel 2) : 0.9079 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 3) : 0.8993 mW / -0.46 dBm
Rcvr signal avg optical power(Channel 4) : 0.8778 mW / -0.57 dBm
The module is not in low power mode, as it is not forced by hardware
(Power override is on) or by software (Power set is off).
The power mode can be queried from the kernel. In case Power override
was off, the kernel would need to take into account the state of the
LPMode signal, which is not visible to user space.
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp13 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp13 up
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7934 mW / -1.01 dBm
Transmit avg optical power (Channel 2) : 0.7859 mW / -1.05 dBm
Transmit avg optical power (Channel 3) : 0.7885 mW / -1.03 dBm
Transmit avg optical power (Channel 4) : 0.7985 mW / -0.98 dBm
Rcvr signal avg optical power(Channel 1) : 0.9325 mW / -0.30 dBm
Rcvr signal avg optical power(Channel 2) : 0.9034 mW / -0.44 dBm
Rcvr signal avg optical power(Channel 3) : 0.9086 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 4) : 0.8885 mW / -0.51 dBm
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp13 down
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-06 18:46:42 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_MODULE_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_module_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_module_get_policy) - 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_MODULE_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
ethtool: Add ability to control transceiver modules' power mode
Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and
'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver
modules parameters and retrieve their status.
The first parameter to control is the power mode of the module. It is
only relevant for paged memory modules, as flat memory modules always
operate in low power mode.
When a paged memory module is in low power mode, its power consumption
is reduced to the minimum, the management interface towards the host is
available and the data path is deactivated.
User space can choose to put modules that are not currently in use in
low power mode and transition them to high power mode before putting the
associated ports administratively up. This is useful for user space that
favors reduced power consumption and lower temperatures over reduced
link up times. In QSFP-DD modules the transition from low power mode to
high power mode can take a few seconds and this transition is only
expected to get longer with future / more complex modules.
User space can control the power mode of the module via the power mode
policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible
values:
* high: Module is always in high power mode.
* auto: Module is transitioned by the host to high power mode when the
first port using it is put administratively up and to low power mode
when the last port using it is put administratively down.
The operational power mode of the module is available to user space via
the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not
reported to user space when a module is not plugged-in.
The user API is designed to be generic enough so that it could be used
for modules with different memory maps (e.g., SFF-8636, CMIS).
The only implementation of the device driver API in this series is for a
MAC driver (mlxsw) where the module is controlled by the device's
firmware, but it is designed to be generic enough so that it could also
be used by implementations where the module is controlled by the CPU.
CMIS testing
============
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
The module is not in low power mode, as it is not forced by hardware
(LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off).
The power mode can be queried from the kernel. In case
LowPwrAllowRequestHW was on, the kernel would need to take into account
the state of the LowPwrRequestHW signal, which is not visible to user
space.
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp11 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp11 up
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x03 (ModuleReady)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : Off
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp11 down
Query the power mode again:
$ ethtool --show-module swp11
Module parameters for swp11:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp11
Identifier : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
...
Module State : 0x01 (ModuleLowPwr)
LowPwrAllowRequestHW : Off
LowPwrRequestSW : On
SFF-8636 testing
================
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7733 mW / -1.12 dBm
Transmit avg optical power (Channel 2) : 0.7649 mW / -1.16 dBm
Transmit avg optical power (Channel 3) : 0.7790 mW / -1.08 dBm
Transmit avg optical power (Channel 4) : 0.7837 mW / -1.06 dBm
Rcvr signal avg optical power(Channel 1) : 0.9302 mW / -0.31 dBm
Rcvr signal avg optical power(Channel 2) : 0.9079 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 3) : 0.8993 mW / -0.46 dBm
Rcvr signal avg optical power(Channel 4) : 0.8778 mW / -0.57 dBm
The module is not in low power mode, as it is not forced by hardware
(Power override is on) or by software (Power set is off).
The power mode can be queried from the kernel. In case Power override
was off, the kernel would need to take into account the state of the
LPMode signal, which is not visible to user space.
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy high
power-mode high
Change the power mode policy to 'auto':
# ethtool --set-module swp13 power-mode-policy auto
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Put the associated port administratively up which will instruct the host
to transition the module to high power mode:
# ip link set dev swp13 up
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode high
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) enabled
Power set : Off
Power override : On
...
Transmit avg optical power (Channel 1) : 0.7934 mW / -1.01 dBm
Transmit avg optical power (Channel 2) : 0.7859 mW / -1.05 dBm
Transmit avg optical power (Channel 3) : 0.7885 mW / -1.03 dBm
Transmit avg optical power (Channel 4) : 0.7985 mW / -0.98 dBm
Rcvr signal avg optical power(Channel 1) : 0.9325 mW / -0.30 dBm
Rcvr signal avg optical power(Channel 2) : 0.9034 mW / -0.44 dBm
Rcvr signal avg optical power(Channel 3) : 0.9086 mW / -0.42 dBm
Rcvr signal avg optical power(Channel 4) : 0.8885 mW / -0.51 dBm
Put the associated port administratively down which will instruct the
host to transition the module to low power mode:
# ip link set dev swp13 down
Query the power mode again:
$ ethtool --show-module swp13
Module parameters for swp13:
power-mode-policy auto
power-mode low
Verify with the data read from the EEPROM:
# ethtool -m swp13
Identifier : 0x11 (QSFP28)
...
Extended identifier description : 5.0W max. Power consumption, High Power Class (> 3.5 W) not enabled
Power set : On
Power override : On
...
Transmit avg optical power (Channel 1) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 2) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 3) : 0.0000 mW / -inf dBm
Transmit avg optical power (Channel 4) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 1) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 2) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 3) : 0.0000 mW / -inf dBm
Rcvr signal avg optical power(Channel 4) : 0.0000 mW / -inf dBm
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-06 18:46:42 +08:00
|
|
|
.policy = ethnl_module_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1,
|
|
|
|
},
|
2022-10-03 14:52:00 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PSE_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_pse_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_pse_get_policy) - 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PSE_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2022-10-03 14:52:00 +08:00
|
|
|
.policy = ethnl_pse_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
|
|
|
|
},
|
2022-12-02 08:25:55 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_RSS_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.policy = ethnl_rss_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
|
|
|
|
},
|
2023-01-10 00:59:39 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PLCA_GET_CFG,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_plca_get_cfg_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_plca_get_cfg_policy) - 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PLCA_SET_CFG,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
2023-01-10 00:59:39 +08:00
|
|
|
.policy = ethnl_plca_set_cfg_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_plca_set_cfg_policy) - 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_PLCA_GET_STATUS,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_plca_get_status_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_plca_get_status_policy) - 1,
|
|
|
|
},
|
net: ethtool: add support for MAC Merge layer
The MAC merge sublayer (IEEE 802.3-2018 clause 99) is one of 2
specifications (the other being Frame Preemption; IEEE 802.1Q-2018
clause 6.7.2), which work together to minimize latency caused by frame
interference at TX. The overall goal of TSN is for normal traffic and
traffic with a bounded deadline to be able to cohabitate on the same L2
network and not bother each other too much.
The standards achieve this (partly) by introducing the concept of
preemptible traffic, i.e. Ethernet frames that have a custom value for
the Start-of-Frame-Delimiter (SFD), and these frames can be fragmented
and reassembled at L2 on a link-local basis. The non-preemptible frames
are called express traffic, they are transmitted using a normal SFD, and
they can preempt preemptible frames, therefore having lower latency,
which can matter at lower (100 Mbps) link speeds, or at high MTUs (jumbo
frames around 9K). Preemption is not recursive, i.e. a P frame cannot
preempt another P frame. Preemption also does not depend upon priority,
or otherwise said, an E frame with prio 0 will still preempt a P frame
with prio 7.
In terms of implementation, the standards talk about the presence of an
express MAC (eMAC) which handles express traffic, and a preemptible MAC
(pMAC) which handles preemptible traffic, and these MACs are multiplexed
on the same MII by a MAC merge layer.
To support frame preemption, the definition of the SFD was generalized
to SMD (Start-of-mPacket-Delimiter), where an mPacket is essentially an
Ethernet frame fragment, or a complete frame. Stations unaware of an SMD
value different from the standard SFD will treat P frames as error
frames. To prevent that from happening, a negotiation process is
defined.
On RX, packets are dispatched to the eMAC or pMAC after being filtered
by their SMD. On TX, the eMAC/pMAC classification decision is taken by
the 802.1Q spec, based on packet priority (each of the 8 user priority
values may have an admin-status of preemptible or express).
The MAC Merge layer and the Frame Preemption parameters have some degree
of independence in terms of how software stacks are supposed to deal
with them. The activation of the MM layer is supposed to be controlled
by an LLDP daemon (after it has been communicated that the link partner
also supports it), after which a (hardware-based or not) verification
handshake takes place, before actually enabling the feature. So the
process is intended to be relatively plug-and-play. Whereas FP settings
are supposed to be coordinated across a network using something
approximating NETCONF.
The support contained here is exclusively for the 802.3 (MAC Merge)
portions and not for the 802.1Q (Frame Preemption) parts. This API is
sufficient for an LLDP daemon to do its job. The FP adminStatus variable
from 802.1Q is outside the scope of an LLDP daemon.
I have taken a few creative licenses and augmented the Linux kernel UAPI
compared to the standard managed objects recommended by IEEE 802.3.
These are:
- ETHTOOL_A_MM_PMAC_ENABLED: According to Figure 99-6: Receive
Processing state diagram, a MAC Merge layer is always supposed to be
able to receive P frames. However, this implies keeping the pMAC
powered on, which will consume needless power in applications where FP
will never be used. If LLDP is used, the reception of an Additional
Ethernet Capabilities TLV from the link partner is sufficient
indication that the pMAC should be enabled. So my proposal is that in
Linux, we keep the pMAC turned off by default and that user space
turns it on when needed.
- ETHTOOL_A_MM_VERIFY_ENABLED: The IEEE managed object is called
aMACMergeVerifyDisableTx. I opted for consistency (positive logic) in
the boolean netlink attributes offered, so this is also positive here.
Other than the meaning being reversed, they correspond to the same
thing.
- ETHTOOL_A_MM_MAX_VERIFY_TIME: I found it most reasonable for a LLDP
daemon to maximize the verifyTime variable (delay between SMD-V
transmissions), to maximize its chances that the LP replies. IEEE says
that the verifyTime can range between 1 and 128 ms, but the NXP ENETC
stupidly keeps this variable in a 7 bit register, so the maximum
supported value is 127 ms. I could have chosen to hardcode this in the
LLDP daemon to a lower value, but why not let the kernel expose its
supported range directly.
- ETHTOOL_A_MM_TX_MIN_FRAG_SIZE: the standard managed object is called
aMACMergeAddFragSize, and expresses the "additional" fragment size
(on top of ETH_ZLEN), whereas this expresses the absolute value of the
fragment size.
- ETHTOOL_A_MM_RX_MIN_FRAG_SIZE: there doesn't appear to exist a managed
object mandated by the standard, but user space clearly needs to know
what is the minimum supported fragment size of our local receiver,
since LLDP must advertise a value no lower than that.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-01-19 20:26:54 +08:00
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_MM_GET,
|
|
|
|
.doit = ethnl_default_doit,
|
|
|
|
.start = ethnl_default_start,
|
|
|
|
.dumpit = ethnl_default_dumpit,
|
|
|
|
.done = ethnl_default_done,
|
|
|
|
.policy = ethnl_mm_get_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_mm_get_policy) - 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.cmd = ETHTOOL_MSG_MM_SET,
|
|
|
|
.flags = GENL_UNS_ADMIN_PERM,
|
2023-01-26 07:05:19 +08:00
|
|
|
.doit = ethnl_default_set_doit,
|
net: ethtool: add support for MAC Merge layer
The MAC merge sublayer (IEEE 802.3-2018 clause 99) is one of 2
specifications (the other being Frame Preemption; IEEE 802.1Q-2018
clause 6.7.2), which work together to minimize latency caused by frame
interference at TX. The overall goal of TSN is for normal traffic and
traffic with a bounded deadline to be able to cohabitate on the same L2
network and not bother each other too much.
The standards achieve this (partly) by introducing the concept of
preemptible traffic, i.e. Ethernet frames that have a custom value for
the Start-of-Frame-Delimiter (SFD), and these frames can be fragmented
and reassembled at L2 on a link-local basis. The non-preemptible frames
are called express traffic, they are transmitted using a normal SFD, and
they can preempt preemptible frames, therefore having lower latency,
which can matter at lower (100 Mbps) link speeds, or at high MTUs (jumbo
frames around 9K). Preemption is not recursive, i.e. a P frame cannot
preempt another P frame. Preemption also does not depend upon priority,
or otherwise said, an E frame with prio 0 will still preempt a P frame
with prio 7.
In terms of implementation, the standards talk about the presence of an
express MAC (eMAC) which handles express traffic, and a preemptible MAC
(pMAC) which handles preemptible traffic, and these MACs are multiplexed
on the same MII by a MAC merge layer.
To support frame preemption, the definition of the SFD was generalized
to SMD (Start-of-mPacket-Delimiter), where an mPacket is essentially an
Ethernet frame fragment, or a complete frame. Stations unaware of an SMD
value different from the standard SFD will treat P frames as error
frames. To prevent that from happening, a negotiation process is
defined.
On RX, packets are dispatched to the eMAC or pMAC after being filtered
by their SMD. On TX, the eMAC/pMAC classification decision is taken by
the 802.1Q spec, based on packet priority (each of the 8 user priority
values may have an admin-status of preemptible or express).
The MAC Merge layer and the Frame Preemption parameters have some degree
of independence in terms of how software stacks are supposed to deal
with them. The activation of the MM layer is supposed to be controlled
by an LLDP daemon (after it has been communicated that the link partner
also supports it), after which a (hardware-based or not) verification
handshake takes place, before actually enabling the feature. So the
process is intended to be relatively plug-and-play. Whereas FP settings
are supposed to be coordinated across a network using something
approximating NETCONF.
The support contained here is exclusively for the 802.3 (MAC Merge)
portions and not for the 802.1Q (Frame Preemption) parts. This API is
sufficient for an LLDP daemon to do its job. The FP adminStatus variable
from 802.1Q is outside the scope of an LLDP daemon.
I have taken a few creative licenses and augmented the Linux kernel UAPI
compared to the standard managed objects recommended by IEEE 802.3.
These are:
- ETHTOOL_A_MM_PMAC_ENABLED: According to Figure 99-6: Receive
Processing state diagram, a MAC Merge layer is always supposed to be
able to receive P frames. However, this implies keeping the pMAC
powered on, which will consume needless power in applications where FP
will never be used. If LLDP is used, the reception of an Additional
Ethernet Capabilities TLV from the link partner is sufficient
indication that the pMAC should be enabled. So my proposal is that in
Linux, we keep the pMAC turned off by default and that user space
turns it on when needed.
- ETHTOOL_A_MM_VERIFY_ENABLED: The IEEE managed object is called
aMACMergeVerifyDisableTx. I opted for consistency (positive logic) in
the boolean netlink attributes offered, so this is also positive here.
Other than the meaning being reversed, they correspond to the same
thing.
- ETHTOOL_A_MM_MAX_VERIFY_TIME: I found it most reasonable for a LLDP
daemon to maximize the verifyTime variable (delay between SMD-V
transmissions), to maximize its chances that the LP replies. IEEE says
that the verifyTime can range between 1 and 128 ms, but the NXP ENETC
stupidly keeps this variable in a 7 bit register, so the maximum
supported value is 127 ms. I could have chosen to hardcode this in the
LLDP daemon to a lower value, but why not let the kernel expose its
supported range directly.
- ETHTOOL_A_MM_TX_MIN_FRAG_SIZE: the standard managed object is called
aMACMergeAddFragSize, and expresses the "additional" fragment size
(on top of ETH_ZLEN), whereas this expresses the absolute value of the
fragment size.
- ETHTOOL_A_MM_RX_MIN_FRAG_SIZE: there doesn't appear to exist a managed
object mandated by the standard, but user space clearly needs to know
what is the minimum supported fragment size of our local receiver,
since LLDP must advertise a value no lower than that.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-01-19 20:26:54 +08:00
|
|
|
.policy = ethnl_mm_set_policy,
|
|
|
|
.maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1,
|
|
|
|
},
|
2019-12-27 22:55:18 +08:00
|
|
|
};
|
|
|
|
|
2019-12-27 22:55:33 +08:00
|
|
|
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
|
|
|
|
[ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME },
|
|
|
|
};
|
|
|
|
|
2020-09-29 08:58:41 +08:00
|
|
|
static struct genl_family ethtool_genl_family __ro_after_init = {
|
2019-12-27 22:55:18 +08:00
|
|
|
.name = ETHTOOL_GENL_NAME,
|
|
|
|
.version = ETHTOOL_GENL_VERSION,
|
|
|
|
.netnsok = true,
|
|
|
|
.parallel_ops = true,
|
|
|
|
.ops = ethtool_genl_ops,
|
|
|
|
.n_ops = ARRAY_SIZE(ethtool_genl_ops),
|
2022-08-25 08:18:30 +08:00
|
|
|
.resv_start_op = ETHTOOL_MSG_MODULE_GET + 1,
|
2019-12-27 22:55:33 +08:00
|
|
|
.mcgrps = ethtool_nl_mcgrps,
|
|
|
|
.n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps),
|
2019-12-27 22:55:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* module setup */
|
|
|
|
|
|
|
|
static int __init ethnl_init(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = genl_register_family(ðtool_genl_family);
|
|
|
|
if (WARN(ret < 0, "ethtool: genetlink family registration failed"))
|
|
|
|
return ret;
|
2019-12-27 22:55:33 +08:00
|
|
|
ethnl_ok = true;
|
2019-12-27 22:55:18 +08:00
|
|
|
|
2020-03-13 04:08:03 +08:00
|
|
|
ret = register_netdevice_notifier(ðnl_netdev_notifier);
|
|
|
|
WARN(ret < 0, "ethtool: net device notifier registration failed");
|
|
|
|
return ret;
|
2019-12-27 22:55:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
subsys_initcall(ethnl_init);
|