Add devlink_eswitch_mode= command line support for setting a default eswitch mode during device initialization. The supported syntax selects either all devlink handles or one explicit comma-separated handle list: devlink_eswitch_mode=[*]: devlink_eswitch_mode=[[,...]]: where is one of legacy, switchdev or switchdev_inactive. All selected handles receive the same mode. Assigning different modes to different handle lists in the same parameter value is not supported. The default is applied through the existing eswitch_mode_set() devlink operation, matching the userspace devlink eswitch mode command. devlink core applies it when a matching devlink instance is registered and after a successful devlink reload that performed DRIVER_REINIT, so rebuilt device state returns to the requested boot default. Document the devlink_eswitch_mode= syntax and duplicate handle handling. Signed-off-by: Mark Bloch --- .../admin-guide/kernel-parameters.txt | 25 ++ .../networking/devlink/devlink-defaults.rst | 80 ++++++ Documentation/networking/devlink/index.rst | 1 + net/devlink/core.c | 261 ++++++++++++++++++ net/devlink/dev.c | 3 + net/devlink/devl_internal.h | 1 + 6 files changed, 371 insertions(+) create mode 100644 Documentation/networking/devlink/devlink-defaults.rst diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 063c11ca33e5..7af9f2898d92 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1264,6 +1264,31 @@ Kernel parameters dell_smm_hwmon.fan_max= [HW] Maximum configurable fan speed. + devlink_eswitch_mode= + [NET] + Format: + []: + + : + * | [,...] + + : + / + + Configure default devlink eswitch mode for matching + devlink instances during device initialization. + + : + legacy | switchdev | switchdev_inactive + + Examples: + devlink_eswitch_mode=[*]:switchdev + devlink_eswitch_mode=[pci/0000:08:00.0]:switchdev + devlink_eswitch_mode=[pci/0000:08:00.0,pci/0000:09:00.1]:legacy + + See Documentation/networking/devlink/devlink-defaults.rst + for the full syntax. + dfltcc= [HW,S390] Format: { on | off | def_only | inf_only | always } on: s390 zlib hardware support for compression on diff --git a/Documentation/networking/devlink/devlink-defaults.rst b/Documentation/networking/devlink/devlink-defaults.rst new file mode 100644 index 000000000000..b554e75eeeea --- /dev/null +++ b/Documentation/networking/devlink/devlink-defaults.rst @@ -0,0 +1,80 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +Devlink Eswitch Mode Defaults +============================== + +Devlink eswitch mode defaults allow the eswitch mode to be provided on the +kernel command line and applied to matching devlink instances during device +initialization. + +The devlink device is selected by its devlink handle. For PCI devices this is +the same handle shown by ``devlink dev show``, for example +``pci/0000:08:00.0``. + +Kernel command line syntax +========================== + +Defaults are specified with the ``devlink_eswitch_mode=`` kernel command line +parameter. + +The general syntax is:: + + devlink_eswitch_mode=[]: + +```` is either ``*`` or one or more devlink handles:: + + * | /[,/...] + +``*`` applies the mode to every devlink instance. All handles in the same +``[]`` list receive the same eswitch mode. + +```` is one of ``legacy``, ``switchdev`` or ``switchdev_inactive``. + +Syntax rules +------------ + +The following syntax rules apply: + +* Specify the default in one ``devlink_eswitch_mode=`` parameter. Repeated + ``devlink_eswitch_mode=`` parameters are not accumulated. +* The ``devlink_eswitch_mode=`` value is limited by the kernel command line + size. +* Whitespace is not allowed within the parameter value. +* ```` must be either ``*`` or a handle list. ``*`` cannot be + combined with explicit handles. +* ```` and ```` must not be empty. +* ```` must not contain ``:``. +* ```` may contain ``:``. This allows PCI names such as + ``0000:08:00.0``. +* Handles must not contain whitespace, ``[``, ``]``, ``*`` or more than one + ``/``. +* A comma inside ``[]`` separates handles. +* Comma-separated default groups are not supported. +* Duplicate handles are rejected and the devlink eswitch mode default is + ignored. + +The eswitch mode default corresponds to the userspace command:: + + devlink dev eswitch set mode + + +Examples +======== + +Set all devlink instances to switchdev mode:: + + devlink_eswitch_mode=[*]:switchdev + +Set one PCI devlink instance to switchdev mode:: + + devlink_eswitch_mode=[pci/0000:08:00.0]:switchdev + +Set two PCI devlink instances to legacy mode:: + + devlink_eswitch_mode=[pci/0000:08:00.0,pci/0000:09:00.1]:legacy + +The following is invalid because comma-separated default groups are not +supported:: + + devlink_eswitch_mode=[pci/0000:08:00.0]:switchdev,[pci/0000:09:00.0]:switchdev_inactive diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index f7ba7dcf477d..0d27a7008b14 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -56,6 +56,7 @@ general. :maxdepth: 1 devlink-dpipe + devlink-defaults devlink-eswitch-attr devlink-flash devlink-health diff --git a/net/devlink/core.c b/net/devlink/core.c index eeb6a71f5f56..3e1b26459894 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -4,6 +4,10 @@ * Copyright (c) 2016 Jiri Pirko */ +#include +#include +#include +#include #include #define CREATE_TRACE_POINTS #include @@ -16,6 +20,234 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); +static char *devlink_default_esw_mode_param; +static bool devlink_default_esw_mode_match_all; +static enum devlink_eswitch_mode devlink_default_esw_mode; +static LIST_HEAD(devlink_default_esw_mode_nodes); + +struct devlink_default_esw_mode_node { + struct list_head list; + char *bus_name; + char *dev_name; +}; + +static int __init +devlink_default_esw_mode_to_value(const char *str, + enum devlink_eswitch_mode *mode) +{ + if (!strcmp(str, "legacy")) { + *mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; + } + if (!strcmp(str, "switchdev")) { + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + return 0; + } + if (!strcmp(str, "switchdev_inactive")) { + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE; + return 0; + } + + return -EINVAL; +} + +static int __init +devlink_default_esw_mode_handle_parse(char *handle, char **bus_name, + char **dev_name) +{ + char *slash; + char *p; + + if (!handle || !*handle) + return -EINVAL; + + for (p = handle; *p; p++) { + if (*p == '[' || *p == ']' || *p == '*') + return -EINVAL; + } + + slash = strchr(handle, '/'); + if (!slash || slash == handle || !slash[1]) + return -EINVAL; + if (strchr(slash + 1, '/')) + return -EINVAL; + + *slash = '\0'; + if (strchr(handle, ':')) + return -EINVAL; + + *bus_name = handle; + *dev_name = slash + 1; + return 0; +} + +static struct devlink_default_esw_mode_node * +devlink_default_esw_mode_node_find(const char *bus_name, const char *dev_name) +{ + struct devlink_default_esw_mode_node *node; + + list_for_each_entry(node, &devlink_default_esw_mode_nodes, list) { + if (!strcmp(node->bus_name, bus_name) && + !strcmp(node->dev_name, dev_name)) + return node; + } + + return NULL; +} + +static int __init +devlink_default_esw_mode_node_add(const char *bus_name, const char *dev_name) +{ + struct devlink_default_esw_mode_node *node; + + if (devlink_default_esw_mode_node_find(bus_name, dev_name)) + return -EEXIST; + + node = kzalloc_obj(*node); + if (!node) + return -ENOMEM; + + INIT_LIST_HEAD(&node->list); + node->bus_name = kstrdup(bus_name, GFP_KERNEL); + node->dev_name = kstrdup(dev_name, GFP_KERNEL); + if (!node->bus_name || !node->dev_name) { + kfree(node->bus_name); + kfree(node->dev_name); + kfree(node); + return -ENOMEM; + } + + list_add_tail(&node->list, &devlink_default_esw_mode_nodes); + return 0; +} + +static int __init devlink_default_esw_mode_handles_parse(char *handles) +{ + char *handle; + int err; + + if (!strcmp(handles, "*")) { + devlink_default_esw_mode_match_all = true; + return 0; + } + + while ((handle = strsep(&handles, ",")) != NULL) { + char *bus_name; + char *dev_name; + + err = devlink_default_esw_mode_handle_parse(handle, &bus_name, + &dev_name); + if (err) + return err; + + err = devlink_default_esw_mode_node_add(bus_name, dev_name); + if (err) + return err; + } + + return 0; +} + +static void __init +devlink_default_esw_mode_node_free(struct devlink_default_esw_mode_node *node) +{ + kfree(node->bus_name); + kfree(node->dev_name); + kfree(node); +} + +static void __init devlink_default_esw_mode_nodes_clear(void) +{ + struct devlink_default_esw_mode_node *node; + struct devlink_default_esw_mode_node *node_tmp; + + list_for_each_entry_safe(node, node_tmp, + &devlink_default_esw_mode_nodes, list) { + list_del(&node->list); + devlink_default_esw_mode_node_free(node); + } + + devlink_default_esw_mode_match_all = false; +} + +static int __init devlink_default_esw_mode_parse(char *str) +{ + char *handles_end; + char *handles; + char *mode; + enum devlink_eswitch_mode esw_mode; + int err; + + if (!str || *str != '[') + return -EINVAL; + + handles = str + 1; + handles_end = strchr(handles, ']'); + if (!handles_end || handles_end[1] != ':' || !handles_end[2]) + return -EINVAL; + + *handles_end = '\0'; + mode = handles_end + 2; + if (!*handles) + return -EINVAL; + + err = devlink_default_esw_mode_to_value(mode, &esw_mode); + if (err) + return err; + + err = devlink_default_esw_mode_handles_parse(handles); + if (err) + devlink_default_esw_mode_nodes_clear(); + else + devlink_default_esw_mode = esw_mode; + + return err; +} + +static bool devlink_default_esw_mode_match(struct devlink *devlink) +{ + const char *bus_name = devlink_bus_name(devlink); + const char *dev_name = devlink_dev_name(devlink); + struct devlink_default_esw_mode_node *node; + + if (devlink_default_esw_mode_match_all) + return true; + + node = devlink_default_esw_mode_node_find(bus_name, dev_name); + return !!node; +} + +void devlink_apply_default_esw_mode(struct devlink *devlink) +{ + const struct devlink_ops *ops = devlink->ops; + int err; + + devl_assert_locked(devlink); + + if (!devlink_default_esw_mode_match(devlink)) + return; + + if (!ops->eswitch_mode_set) { + if (!devlink_default_esw_mode_match_all) + devl_warn(devlink, + "devlink_eswitch_mode= selected this device but eswitch mode setting is not supported\n"); + return; + } + + err = ops->eswitch_mode_set(devlink, devlink_default_esw_mode, NULL); + if (err) + devl_warn(devlink, + "Couldn't apply default eswitch mode, err %d\n", + err); +} + +static int __init devlink_default_esw_mode_setup(char *str) +{ + devlink_default_esw_mode_param = str; + return 1; +} +__setup("devlink_eswitch_mode=", devlink_default_esw_mode_setup); + static struct devlink *devlinks_xa_get(unsigned long index) { struct devlink *devlink; @@ -391,6 +623,7 @@ int devl_register(struct devlink *devlink) xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); devlink_rel_nested_in_notify(devlink); + devlink_apply_default_esw_mode(devlink); return 0; } @@ -578,6 +811,31 @@ static int __init devlink_init(void) { int err; + if (devlink_default_esw_mode_param) { + char *def; + + def = kstrdup(devlink_default_esw_mode_param, GFP_KERNEL); + if (!def) { + devlink_default_esw_mode_param = NULL; + pr_warn("devlink: devlink_eswitch_mode parameter ignored, failed to allocate memory\n"); + } else { + err = devlink_default_esw_mode_parse(def); + kfree(def); + if (err == -EEXIST) { + devlink_default_esw_mode_param = NULL; + pr_warn("devlink: duplicate eswitch mode handles ignored\n"); + } else if (err == -EINVAL) { + devlink_default_esw_mode_param = NULL; + pr_warn("devlink: invalid devlink_eswitch_mode parameter ignored\n"); + } else if (err == -ENOMEM) { + devlink_default_esw_mode_param = NULL; + pr_warn("devlink: devlink_eswitch_mode parameter ignored, failed to allocate memory\n"); + } else if (err) { + goto out; + } + } + } + err = register_pernet_subsys(&devlink_pernet_ops); if (err) goto out; @@ -593,7 +851,10 @@ static int __init devlink_init(void) out_unreg_pernet_subsys: unregister_pernet_subsys(&devlink_pernet_ops); out: + if (err) + devlink_default_esw_mode_nodes_clear(); WARN_ON(err); + return err; } diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 57b2b8f03543..0b4a831465e8 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -478,6 +478,9 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net, return err; WARN_ON(!(*actions_performed & BIT(action))); + if (*actions_performed & BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT)) + devlink_apply_default_esw_mode(devlink); + /* Catch driver on updating the remote action within devlink reload */ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats))); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index e4e48ee2da5a..12557b65248d 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -71,6 +71,7 @@ extern struct genl_family devlink_nl_family; struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, struct net *net, struct device *dev, const struct device_driver *dev_driver); +void devlink_apply_default_esw_mode(struct devlink *devlink); #define devl_warn(devlink, format, args...) \ do { \ -- 2.34.1