summaryrefslogtreecommitdiff
path: root/drivers/net/mlx5/mlx5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/mlx5/mlx5.c')
-rw-r--r--drivers/net/mlx5/mlx5.c317
1 files changed, 225 insertions, 92 deletions
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9d0b129..9dd74ec 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -130,6 +130,16 @@ static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
/** Driver-specific log messages type. */
int mlx5_logtype;
+/** Data associated with devices to spawn. */
+struct mlx5_dev_spawn_data {
+ uint32_t ifindex; /**< Network interface index. */
+ uint32_t max_port; /**< IB device maximal port index. */
+ uint32_t ibv_port; /**< IB device physical port index. */
+ struct mlx5_switch_info info; /**< Switch information. */
+ struct ibv_device *ibv_dev; /**< Associated IB device. */
+ struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
+};
+
/**
* Prepare shared data between primary and secondary process.
*/
@@ -716,12 +726,10 @@ mlx5_uar_init_secondary(struct rte_eth_dev *dev)
*
* @param dpdk_dev
* Backing DPDK device.
- * @param ibv_dev
- * Verbs device.
+ * @param spawn
+ * Verbs device parameters (name, port, switch_info) to spawn.
* @param config
* Device configuration parameters.
- * @param[in] switch_info
- * Switch properties of Ethernet device.
*
* @return
* A valid Ethernet device object on success, NULL otherwise and rte_errno
@@ -732,10 +740,11 @@ mlx5_uar_init_secondary(struct rte_eth_dev *dev)
*/
static struct rte_eth_dev *
mlx5_dev_spawn(struct rte_device *dpdk_dev,
- struct ibv_device *ibv_dev,
- struct mlx5_dev_config config,
- const struct mlx5_switch_info *switch_info)
+ struct mlx5_dev_spawn_data *spawn,
+ struct mlx5_dev_config config)
{
+ const struct mlx5_switch_info *switch_info = &spawn->info;
+ struct ibv_device *ibv_dev = spawn->ibv_dev;
struct ibv_context *ctx = NULL;
struct ibv_device_attr_ex attr;
struct ibv_port_attr port_attr;
@@ -949,7 +958,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
return eth_dev;
}
/* Check port status. */
- err = mlx5_glue->query_port(ctx, 1, &port_attr);
+ err = mlx5_glue->query_port(ctx, spawn->ibv_port, &port_attr);
if (err) {
DRV_LOG(ERR, "port query failed: %s", strerror(err));
goto error;
@@ -1309,14 +1318,6 @@ error:
return NULL;
}
-/** Data associated with devices to spawn. */
-struct mlx5_dev_spawn_data {
- unsigned int ifindex; /**< Network interface index. */
- struct mlx5_switch_info info; /**< Switch information. */
- struct ibv_device *ibv_dev; /**< Associated IB device. */
- struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
-};
-
/**
* Comparison callback to sort device data.
*
@@ -1373,7 +1374,24 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
{
struct ibv_device **ibv_list;
- unsigned int n = 0;
+ /*
+ * Number of found IB Devices matching with requested PCI BDF.
+ * nd != 1 means there are multiple IB devices over the same
+ * PCI device and we have representors and master.
+ */
+ unsigned int nd = 0;
+ /*
+ * Number of found IB device Ports. nd = 1 and np = 1..n means
+ * we have the single multiport IB device, and there may be
+ * representors attached to some of found ports.
+ */
+ unsigned int np = 0;
+ /*
+ * Number of DPDK ethernet devices to Spawn - either over
+ * multiple IB devices or multiple ports of single IB device.
+ * Actually this is the number of iterations to spawn.
+ */
+ unsigned int ns = 0;
struct mlx5_dev_config dev_config;
int ret;
@@ -1385,8 +1403,14 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?");
return -rte_errno;
}
-
+ /*
+ * First scan the list of all Infiniband devices to find
+ * matching ones, gathering into the list.
+ */
struct ibv_device *ibv_match[ret + 1];
+ int nl_route = -1;
+ int nl_rdma = -1;
+ unsigned int i;
while (ret-- > 0) {
struct rte_pci_addr pci_addr;
@@ -1401,77 +1425,183 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
continue;
DRV_LOG(INFO, "PCI information matches for device \"%s\"",
ibv_list[ret]->name);
- ibv_match[n++] = ibv_list[ret];
+ ibv_match[nd++] = ibv_list[ret];
+ }
+ ibv_match[nd] = NULL;
+ if (!nd) {
+ /* No device macthes, just complain and bail out. */
+ mlx5_glue->free_device_list(ibv_list);
+ DRV_LOG(WARNING,
+ "no Verbs device matches PCI device " PCI_PRI_FMT ","
+ " are kernel drivers loaded?",
+ pci_dev->addr.domain, pci_dev->addr.bus,
+ pci_dev->addr.devid, pci_dev->addr.function);
+ rte_errno = ENOENT;
+ ret = -rte_errno;
+ return ret;
+ }
+ nl_route = mlx5_nl_init(NETLINK_ROUTE);
+ nl_rdma = mlx5_nl_init(NETLINK_RDMA);
+ if (nd == 1) {
+ /*
+ * Found single matching device may have multiple ports.
+ * Each port may be representor, we have to check the port
+ * number and check the representors existence.
+ */
+ if (nl_rdma >= 0)
+ np = mlx5_nl_portnum(nl_rdma, ibv_match[0]->name);
+ if (!np)
+ DRV_LOG(WARNING, "can not get IB device \"%s\""
+ " ports number", ibv_match[0]->name);
}
- ibv_match[n] = NULL;
-
- struct mlx5_dev_spawn_data list[n];
- int nl_route = n ? mlx5_nl_init(NETLINK_ROUTE) : -1;
- int nl_rdma = n ? mlx5_nl_init(NETLINK_RDMA) : -1;
- unsigned int i;
- unsigned int u;
-
/*
- * The existence of several matching entries (n > 1) means port
- * representors have been instantiated. No existing Verbs call nor
- * /sys entries can tell them apart, this can only be done through
- * Netlink calls assuming kernel drivers are recent enough to
- * support them.
- *
- * In the event of identification failure through Netlink, try again
- * through sysfs, then either:
- *
- * 1. No device matches (n == 0), complain and bail out.
- * 2. A single IB device matches (n == 1) and is not a representor,
- * assume no switch support.
- * 3. Otherwise no safe assumptions can be made; complain louder and
- * bail out.
+ * Now we can determine the maximal
+ * amount of devices to be spawned.
*/
- for (i = 0; i != n; ++i) {
- list[i].ibv_dev = ibv_match[i];
- list[i].eth_dev = NULL;
- if (nl_rdma < 0)
- list[i].ifindex = 0;
- else
- list[i].ifindex = mlx5_nl_ifindex
- (nl_rdma, list[i].ibv_dev->name, 1);
- if (nl_route < 0 ||
- !list[i].ifindex ||
- mlx5_nl_switch_info(nl_route, list[i].ifindex,
- &list[i].info) ||
- ((!list[i].info.representor && !list[i].info.master) &&
- mlx5_sysfs_switch_info(list[i].ifindex, &list[i].info))) {
- list[i].ifindex = 0;
- memset(&list[i].info, 0, sizeof(list[i].info));
- continue;
+ struct mlx5_dev_spawn_data list[np ? np : nd];
+
+ if (np > 1) {
+ /*
+ * Signle IB device with multiple ports found,
+ * it may be E-Switch master device and representors.
+ * We have to perform identification trough the ports.
+ */
+ assert(nl_rdma >= 0);
+ assert(ns == 0);
+ assert(nd == 1);
+ for (i = 1; i <= np; ++i) {
+ list[ns].max_port = np;
+ list[ns].ibv_port = i;
+ list[ns].ibv_dev = ibv_match[0];
+ list[ns].eth_dev = NULL;
+ list[ns].ifindex = mlx5_nl_ifindex
+ (nl_rdma, list[ns].ibv_dev->name, i);
+ if (!list[ns].ifindex) {
+ /*
+ * No network interface index found for the
+ * specified port, it means there is no
+ * representor on this port. It's OK,
+ * there can be disabled ports, for example
+ * if sriov_numvfs < sriov_totalvfs.
+ */
+ continue;
+ }
+ ret = -1;
+ if (nl_route >= 0)
+ ret = mlx5_nl_switch_info
+ (nl_route,
+ list[ns].ifindex,
+ &list[ns].info);
+ if (ret || (!list[ns].info.representor &&
+ !list[ns].info.master)) {
+ /*
+ * We failed to recognize representors with
+ * Netlink, let's try to perform the task
+ * with sysfs.
+ */
+ ret = mlx5_sysfs_switch_info
+ (list[ns].ifindex,
+ &list[ns].info);
+ }
+ if (!ret && (list[ns].info.representor ^
+ list[ns].info.master))
+ ns++;
}
- }
- if (nl_rdma >= 0)
- close(nl_rdma);
- if (nl_route >= 0)
- close(nl_route);
- /* Count unidentified devices. */
- for (u = 0, i = 0; i != n; ++i)
- if (!list[i].info.master && !list[i].info.representor)
- ++u;
- if (u) {
- if (n == 1 && u == 1) {
- /* Case #2. */
- DRV_LOG(INFO, "no switch support detected");
- } else {
- /* Case #3. */
+ if (!ns) {
DRV_LOG(ERR,
- "unable to tell which of the matching devices"
- " is the master (lack of kernel support?)");
- n = 0;
+ "unable to recognize master/representors"
+ " on the IB device with multiple ports");
+ rte_errno = ENOENT;
+ ret = -rte_errno;
+ goto exit;
+ }
+ } else {
+ /*
+ * The existence of several matching entries (nd > 1) means
+ * port representors have been instantiated. No existing Verbs
+ * call nor sysfs entries can tell them apart, this can only
+ * be done through Netlink calls assuming kernel drivers are
+ * recent enough to support them.
+ *
+ * In the event of identification failure through Netlink,
+ * try again through sysfs, then:
+ *
+ * 1. A single IB device matches (nd == 1) with single
+ * port (np=0/1) and is not a representor, assume
+ * no switch support.
+ *
+ * 2. Otherwise no safe assumptions can be made;
+ * complain louder and bail out.
+ */
+ np = 1;
+ for (i = 0; i != nd; ++i) {
+ memset(&list[ns].info, 0, sizeof(list[ns].info));
+ list[ns].max_port = 1;
+ list[ns].ibv_port = 1;
+ list[ns].ibv_dev = ibv_match[i];
+ list[ns].eth_dev = NULL;
+ list[ns].ifindex = 0;
+ if (nl_rdma >= 0)
+ list[ns].ifindex = mlx5_nl_ifindex
+ (nl_rdma, list[ns].ibv_dev->name, 1);
+ if (!list[ns].ifindex) {
+ /*
+ * No network interface index found for the
+ * specified device, it means there it is not
+ * a representor/master.
+ */
+ continue;
+ }
+ ret = -1;
+ if (nl_route >= 0)
+ ret = mlx5_nl_switch_info
+ (nl_route,
+ list[ns].ifindex,
+ &list[ns].info);
+ if (ret || (!list[ns].info.representor &&
+ !list[ns].info.master)) {
+ /*
+ * We failed to recognize representors with
+ * Netlink, let's try to perform the task
+ * with sysfs.
+ */
+ ret = mlx5_sysfs_switch_info
+ (list[ns].ifindex,
+ &list[ns].info);
+ }
+ if (!ret && (list[ns].info.representor ^
+ list[ns].info.master)) {
+ ns++;
+ } else if ((nd == 1) &&
+ !list[ns].info.representor &&
+ !list[ns].info.master) {
+ /*
+ * Single IB device with
+ * one physical port and
+ * attached network device.
+ * May be SRIOV is not enabled
+ * or there is no representors.
+ */
+ DRV_LOG(INFO, "no E-Switch support detected");
+ ns++;
+ break;
+ }
+ }
+ if (!ns) {
+ DRV_LOG(ERR,
+ "unable to recognize master/representors"
+ " on the multiple IB devices");
+ rte_errno = ENOENT;
+ ret = -rte_errno;
+ goto exit;
}
}
+ assert(ns);
/*
* Sort list to probe devices in natural order for users convenience
* (i.e. master first, then representors from lowest to highest ID).
*/
- if (n)
- qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp);
+ qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp);
/* Default configuration. */
dev_config = (struct mlx5_dev_config){
.hw_padding = 0,
@@ -1490,7 +1620,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
.min_rxqs_num = MLX5_MPRQ_MIN_RXQS,
},
};
- /* Device speicific configuration. */
+ /* Device specific configuration. */
switch (pci_dev->id.device_id) {
case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS_BLUEFIELD;
@@ -1507,12 +1637,12 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
/* Set architecture-dependent default value if unset. */
if (dev_config.txqs_vec == MLX5_ARG_UNSET)
dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS;
- for (i = 0; i != n; ++i) {
+ for (i = 0; i != ns; ++i) {
uint32_t restore;
list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
- list[i].ibv_dev, dev_config,
- &list[i].info);
+ &list[i],
+ dev_config);
if (!list[i].eth_dev) {
if (rte_errno != EBUSY && rte_errno != EEXIST)
break;
@@ -1525,16 +1655,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
list[i].eth_dev->data->dev_flags |= restore;
rte_eth_dev_probing_finish(list[i].eth_dev);
}
- mlx5_glue->free_device_list(ibv_list);
- if (!n) {
- DRV_LOG(WARNING,
- "no Verbs device matches PCI device " PCI_PRI_FMT ","
- " are kernel drivers loaded?",
- pci_dev->addr.domain, pci_dev->addr.bus,
- pci_dev->addr.devid, pci_dev->addr.function);
- rte_errno = ENOENT;
- ret = -rte_errno;
- } else if (i != n) {
+ if (i != ns) {
DRV_LOG(ERR,
"probe of PCI device " PCI_PRI_FMT " aborted after"
" encountering an error: %s",
@@ -1556,6 +1677,18 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
} else {
ret = 0;
}
+exit:
+ /*
+ * Do the routine cleanup:
+ * - close opened Netlink sockets
+ * - free the Infiniband device list
+ */
+ if (nl_rdma >= 0)
+ close(nl_rdma);
+ if (nl_route >= 0)
+ close(nl_route);
+ assert(ibv_list);
+ mlx5_glue->free_device_list(ibv_list);
return ret;
}