diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index 1b7e32d8a61b..936a10f1942c 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -183,6 +183,40 @@ User command examples: values: cmode driverinit value true +esw_port_metadata: Eswitch port metadata state +---------------------------------------------- +When applicable, disabling Eswitch metadata can increase packet rate +up to 20% depending on the use case and packet sizes. + +Eswitch port metadata state controls whether to internally tag packets with +metadata. Metadata tagging must be enabled for multi-port RoCE, failover +between representors and stacked devices. +By default metadata is enabled on the supported devices in E-switch. +Metadata is applicable only for E-switch in switchdev mode and +users may disable it when NONE of the below use cases will be in use: +1. HCA is in Dual/multi-port RoCE mode. +2. VF/SF representor bonding (Usually used for Live migration) +3. Stacked devices + +When metadata is disabled, the above use cases will fail to initialize if +users try to enable them. + +- Show eswitch port metadata:: + + $ devlink dev param show pci/0000:06:00.0 name esw_port_metadata + pci/0000:06:00.0: + name esw_port_metadata type driver-specific + values: + cmode runtime value true + +- Disable eswitch port metadata:: + + $ devlink dev param set pci/0000:06:00.0 name esw_port_metadata value false cmode runtime + +- Change eswitch mode to switchdev mode where after choosing the metadata value:: + + $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev + mlx5 subfunction ================ mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst `) interface. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 38c7c44fe883..ee0f5355f120 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -456,6 +456,50 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } + +static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool); +} + +static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch); + return 0; +} + +static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 esw_mode; + + if (!MLX5_ESWITCH_MANAGER(dev)) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); + return -EOPNOTSUPP; + } + esw_mode = mlx5_eswitch_mode(dev); + if (esw_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch must either disabled or non switchdev mode"); + return -EBUSY; + } + return 0; +} + #endif static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, @@ -490,6 +534,12 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_port_metadata_get, + mlx5_devlink_esw_port_metadata_set, + mlx5_devlink_esw_port_metadata_validate), #endif DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_enable_remote_dev_reset_get, @@ -519,6 +569,18 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) devlink_param_driverinit_value_set(devlink, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, value); + + if (MLX5_ESWITCH_MANAGER(dev)) { + if (mlx5_esw_vport_match_metadata_supported(dev->priv.eswitch)) { + dev->priv.eswitch->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + value.vbool = true; + } else { + value.vbool = false; + } + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + value); + } #endif } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index eff107dad922..7318d44b774b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -10,6 +10,7 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index deafb0e03787..c7a73dbd64b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -314,6 +314,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ab32f685cbb7..1f58e84bdfc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2351,8 +2351,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -static bool -esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) { if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) return false; @@ -2452,6 +2451,28 @@ static int esw_offloads_metadata_init(struct mlx5_eswitch *esw) return err; } +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) +{ + int err = 0; + + down_write(&esw->mode_lock); + if (esw->mode != MLX5_ESWITCH_NONE) { + err = -EBUSY; + goto done; + } + if (!mlx5_esw_vport_match_metadata_supported(esw)) { + err = -EOPNOTSUPP; + goto done; + } + if (enable) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + else + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; +done: + up_write(&esw->mode_lock); + return err; +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) @@ -2673,9 +2694,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_metadata; - if (esw_check_vport_match_metadata_supported(esw)) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - err = esw_offloads_metadata_init(esw); if (err) goto err_metadata; @@ -2725,7 +2743,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) err_vport_metadata: esw_offloads_metadata_uninit(esw); err_metadata: - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); return err; @@ -2761,7 +2778,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_steering_cleanup(esw); mapping_destroy(esw->offloads.reg_c0_obj_pool); esw_offloads_metadata_uninit(esw); - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;