summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoti Haimovsky <motih@mellanox.com>2018-07-10 13:45:54 +0300
committerShahaf Shuler <shahafs@mellanox.com>2018-07-10 14:02:57 +0200
commitba576975a89bdfd186fbcc20a93d7ee16019dbf5 (patch)
tree22b9a4137b006579d054428be2b55bd3bb7ac1d1
parent07581a7e4d090165dac67f02e0b16a5ed4086a4b (diff)
downloaddpdk-ba576975a89bdfd186fbcc20a93d7ee16019dbf5.zip
dpdk-ba576975a89bdfd186fbcc20a93d7ee16019dbf5.tar.gz
dpdk-ba576975a89bdfd186fbcc20a93d7ee16019dbf5.tar.xz
net/mlx4: support hardware TSO
Implement support for hardware TSO. Signed-off-by: Moti Haimovsky <motih@mellanox.com> Acked-by: Matan Azrad <matan@mellanox.com>
-rw-r--r--doc/guides/nics/features/mlx4.ini1
-rw-r--r--doc/guides/nics/mlx4.rst3
-rw-r--r--drivers/net/mlx4/Makefile5
-rw-r--r--drivers/net/mlx4/mlx4.c9
-rw-r--r--drivers/net/mlx4/mlx4.h5
-rw-r--r--drivers/net/mlx4/mlx4_prm.h15
-rw-r--r--drivers/net/mlx4/mlx4_rxtx.c369
-rw-r--r--drivers/net/mlx4/mlx4_rxtx.h2
-rw-r--r--drivers/net/mlx4/mlx4_txq.c8
9 files changed, 413 insertions, 4 deletions
diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
index f6efd21..98a3f61 100644
--- a/doc/guides/nics/features/mlx4.ini
+++ b/doc/guides/nics/features/mlx4.ini
@@ -13,6 +13,7 @@ Queue start/stop = Y
MTU update = Y
Jumbo frame = Y
Scattered Rx = Y
+TSO = Y
Promiscuous mode = Y
Allmulticast mode = Y
Unicast MAC filter = Y
diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 491106a..7cfbda3 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -142,6 +142,9 @@ Limitations
The ability to enable/disable CRC stripping requires OFED version
4.3-1.5.0.0 and above or rdma-core version v18 and above.
+- TSO (Transmit Segmentation Offload) is supported in OFED version
+ 4.4 and above.
+
Prerequisites
-------------
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 73f9d40..63bc003 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -85,6 +85,11 @@ mlx4_autoconf.h.new: FORCE
mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q : > '$@'
+ $Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX4_WQE_LSO_SEG \
+ infiniband/mlx4dv.h \
+ type 'struct mlx4_wqe_lso_seg' \
+ $(AUTOCONF_OUTPUT)
# Create mlx4_autoconf.h or update it in case it differs from the new one.
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 06921c9..c67d2c0 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -677,6 +677,15 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
IBV_RAW_PACKET_CAP_SCATTER_FCS);
DEBUG("FCS stripping toggling is %ssupported",
priv->hw_fcs_strip ? "" : "not ");
+ priv->tso =
+ ((device_attr_ex.tso_caps.max_tso > 0) &&
+ (device_attr_ex.tso_caps.supported_qpts &
+ (1 << IBV_QPT_RAW_PACKET)));
+ if (priv->tso)
+ priv->tso_max_payload_sz =
+ device_attr_ex.tso_caps.max_tso;
+ DEBUG("TSO is %ssupported",
+ priv->tso ? "" : "not ");
/* Configure the first MAC address by default. */
err = mlx4_get_mac(priv, &mac.addr_bytes);
if (err) {
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 300cb4d..89d8c38 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -47,6 +47,9 @@
/** Interrupt alarm timeout value in microseconds. */
#define MLX4_INTR_ALARM_TIMEOUT 100000
+/* Maximum packet headers size (L2+L3+L4) for TSO. */
+#define MLX4_MAX_TSO_HEADER 192
+
/** Port parameter. */
#define MLX4_PMD_PORT_KVARG "port"
@@ -90,6 +93,8 @@ struct priv {
uint32_t hw_csum:1; /**< Checksum offload is supported. */
uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels. */
uint32_t hw_fcs_strip:1; /**< FCS stripping toggling is supported. */
+ uint32_t tso:1; /**< Transmit segmentation offload is supported. */
+ uint32_t tso_max_payload_sz; /**< Max supported TSO payload size. */
uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs format). */
struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b771d8c..aef77ba 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -19,6 +19,7 @@
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
+#include "mlx4_autoconf.h"
/* ConnectX-3 Tx queue basic block. */
#define MLX4_TXBB_SHIFT 6
@@ -40,6 +41,7 @@
/* Work queue element (WQE) flags. */
#define MLX4_WQE_CTRL_IIP_HDR_CSUM (1 << 28)
#define MLX4_WQE_CTRL_IL4_HDR_CSUM (1 << 27)
+#define MLX4_WQE_CTRL_RR (1 << 6)
/* CQE checksum flags. */
enum {
@@ -98,6 +100,19 @@ struct mlx4_cq {
int arm_sn; /**< Rx event counter. */
};
+#ifndef HAVE_IBV_MLX4_WQE_LSO_SEG
+/*
+ * WQE LSO segment structure.
+ * Defined here as backward compatibility for rdma-core v17 and below.
+ * Similar definition is found in infiniband/mlx4dv.h in rdma-core v18
+ * and above.
+ */
+struct mlx4_wqe_lso_seg {
+ rte_be32_t mss_hdr_size;
+ rte_be32_t header[];
+};
+#endif
+
/**
* Retrieve a CQE entry from a CQ.
*
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 78b6dd5..8c88eff 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -38,10 +38,29 @@
* DWORD (32 byte) of a TXBB.
*/
struct pv {
- volatile struct mlx4_wqe_data_seg *dseg;
+ union {
+ volatile struct mlx4_wqe_data_seg *dseg;
+ volatile uint32_t *dst;
+ };
uint32_t val;
};
+/** A helper structure for TSO packet handling. */
+struct tso_info {
+ /** Pointer to the array of saved first DWORD (32 byte) of a TXBB. */
+ struct pv *pv;
+ /** Current entry in the pv array. */
+ int pv_counter;
+ /** Total size of the WQE including padding. */
+ uint32_t wqe_size;
+ /** Size of TSO header to prepend to each packet to send. */
+ uint16_t tso_header_size;
+ /** Total size of the TSO segment in the WQE. */
+ uint16_t wqe_tso_seg_size;
+ /** Raw WQE size in units of 16 Bytes and without padding. */
+ uint8_t fence_size;
+};
+
/** A table to translate Rx completion flags to packet type. */
uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = {
/*
@@ -368,6 +387,342 @@ mlx4_fill_tx_data_seg(volatile struct mlx4_wqe_data_seg *dseg,
}
/**
+ * Obtain and calculate TSO information needed for assembling a TSO WQE.
+ *
+ * @param buf
+ * Pointer to the first packet mbuf.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param tinfo
+ * Pointer to a structure to fill the info with.
+ *
+ * @return
+ * 0 on success, negative value upon error.
+ */
+static inline int
+mlx4_tx_burst_tso_get_params(struct rte_mbuf *buf,
+ struct txq *txq,
+ struct tso_info *tinfo)
+{
+ struct mlx4_sq *sq = &txq->msq;
+ const uint8_t tunneled = txq->priv->hw_csum_l2tun &&
+ (buf->ol_flags & PKT_TX_TUNNEL_MASK);
+
+ tinfo->tso_header_size = buf->l2_len + buf->l3_len + buf->l4_len;
+ if (tunneled)
+ tinfo->tso_header_size +=
+ buf->outer_l2_len + buf->outer_l3_len;
+ if (unlikely(buf->tso_segsz == 0 ||
+ tinfo->tso_header_size == 0 ||
+ tinfo->tso_header_size > MLX4_MAX_TSO_HEADER ||
+ tinfo->tso_header_size > buf->data_len))
+ return -EINVAL;
+ /*
+ * Calculate the WQE TSO segment size
+ * Note:
+ * 1. An LSO segment must be padded such that the subsequent data
+ * segment is 16-byte aligned.
+ * 2. The start address of the TSO segment is always 16 Bytes aligned.
+ */
+ tinfo->wqe_tso_seg_size = RTE_ALIGN(sizeof(struct mlx4_wqe_lso_seg) +
+ tinfo->tso_header_size,
+ sizeof(struct mlx4_wqe_data_seg));
+ tinfo->fence_size = ((sizeof(struct mlx4_wqe_ctrl_seg) +
+ tinfo->wqe_tso_seg_size) >> MLX4_SEG_SHIFT) +
+ buf->nb_segs;
+ tinfo->wqe_size =
+ RTE_ALIGN((uint32_t)(tinfo->fence_size << MLX4_SEG_SHIFT),
+ MLX4_TXBB_SIZE);
+ /* Validate WQE size and WQE space in the send queue. */
+ if (sq->remain_size < tinfo->wqe_size ||
+ tinfo->wqe_size > MLX4_MAX_WQE_SIZE)
+ return -ENOMEM;
+ /* Init pv. */
+ tinfo->pv = (struct pv *)txq->bounce_buf;
+ tinfo->pv_counter = 0;
+ return 0;
+}
+
+/**
+ * Fill the TSO WQE data segments with info on buffers to transmit .
+ *
+ * @param buf
+ * Pointer to the first packet mbuf.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param tinfo
+ * Pointer to TSO info to use.
+ * @param dseg
+ * Pointer to the first data segment in the TSO WQE.
+ * @param ctrl
+ * Pointer to the control segment in the TSO WQE.
+ *
+ * @return
+ * 0 on success, negative value upon error.
+ */
+static inline volatile struct mlx4_wqe_ctrl_seg *
+mlx4_tx_burst_fill_tso_dsegs(struct rte_mbuf *buf,
+ struct txq *txq,
+ struct tso_info *tinfo,
+ volatile struct mlx4_wqe_data_seg *dseg,
+ volatile struct mlx4_wqe_ctrl_seg *ctrl)
+{
+ uint32_t lkey;
+ int nb_segs = buf->nb_segs;
+ int nb_segs_txbb;
+ struct mlx4_sq *sq = &txq->msq;
+ struct rte_mbuf *sbuf = buf;
+ struct pv *pv = tinfo->pv;
+ int *pv_counter = &tinfo->pv_counter;
+ volatile struct mlx4_wqe_ctrl_seg *ctrl_next =
+ (volatile struct mlx4_wqe_ctrl_seg *)
+ ((volatile uint8_t *)ctrl + tinfo->wqe_size);
+ uint16_t data_len = sbuf->data_len - tinfo->tso_header_size;
+ uintptr_t data_addr = rte_pktmbuf_mtod_offset(sbuf, uintptr_t,
+ tinfo->tso_header_size);
+
+ do {
+ /* how many dseg entries do we have in the current TXBB ? */
+ nb_segs_txbb = (MLX4_TXBB_SIZE -
+ ((uintptr_t)dseg & (MLX4_TXBB_SIZE - 1))) >>
+ MLX4_SEG_SHIFT;
+ switch (nb_segs_txbb) {
+#ifndef NDEBUG
+ default:
+ /* Should never happen. */
+ rte_panic("%p: Invalid number of SGEs(%d) for a TXBB",
+ (void *)txq, nb_segs_txbb);
+ /* rte_panic never returns. */
+ break;
+#endif /* NDEBUG */
+ case 4:
+ /* Memory region key for this memory pool. */
+ lkey = mlx4_tx_mb2mr(txq, sbuf);
+ if (unlikely(lkey == (uint32_t)-1))
+ goto err;
+ dseg->addr = rte_cpu_to_be_64(data_addr);
+ dseg->lkey = lkey;
+ /*
+ * This data segment starts at the beginning of a new
+ * TXBB, so we need to postpone its byte_count writing
+ * for later.
+ */
+ pv[*pv_counter].dseg = dseg;
+ /*
+ * Zero length segment is treated as inline segment
+ * with zero data.
+ */
+ pv[(*pv_counter)++].val =
+ rte_cpu_to_be_32(data_len ?
+ data_len :
+ 0x80000000);
+ if (--nb_segs == 0)
+ return ctrl_next;
+ /* Prepare next buf info */
+ sbuf = sbuf->next;
+ dseg++;
+ data_len = sbuf->data_len;
+ data_addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ /* fallthrough */
+ case 3:
+ lkey = mlx4_tx_mb2mr(txq, sbuf);
+ if (unlikely(lkey == (uint32_t)-1))
+ goto err;
+ mlx4_fill_tx_data_seg(dseg, lkey, data_addr,
+ rte_cpu_to_be_32(data_len ?
+ data_len :
+ 0x80000000));
+ if (--nb_segs == 0)
+ return ctrl_next;
+ /* Prepare next buf info */
+ sbuf = sbuf->next;
+ dseg++;
+ data_len = sbuf->data_len;
+ data_addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ /* fallthrough */
+ case 2:
+ lkey = mlx4_tx_mb2mr(txq, sbuf);
+ if (unlikely(lkey == (uint32_t)-1))
+ goto err;
+ mlx4_fill_tx_data_seg(dseg, lkey, data_addr,
+ rte_cpu_to_be_32(data_len ?
+ data_len :
+ 0x80000000));
+ if (--nb_segs == 0)
+ return ctrl_next;
+ /* Prepare next buf info */
+ sbuf = sbuf->next;
+ dseg++;
+ data_len = sbuf->data_len;
+ data_addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ /* fallthrough */
+ case 1:
+ lkey = mlx4_tx_mb2mr(txq, sbuf);
+ if (unlikely(lkey == (uint32_t)-1))
+ goto err;
+ mlx4_fill_tx_data_seg(dseg, lkey, data_addr,
+ rte_cpu_to_be_32(data_len ?
+ data_len :
+ 0x80000000));
+ if (--nb_segs == 0)
+ return ctrl_next;
+ /* Prepare next buf info */
+ sbuf = sbuf->next;
+ dseg++;
+ data_len = sbuf->data_len;
+ data_addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+ /* fallthrough */
+ }
+ /* Wrap dseg if it points at the end of the queue. */
+ if ((volatile uint8_t *)dseg >= sq->eob)
+ dseg = (volatile struct mlx4_wqe_data_seg *)
+ ((volatile uint8_t *)dseg - sq->size);
+ } while (true);
+err:
+ return NULL;
+}
+
+/**
+ * Fill the packet's l2, l3 and l4 headers to the WQE.
+ *
+ * This will be used as the header for each TSO segment that is transmitted.
+ *
+ * @param buf
+ * Pointer to the first packet mbuf.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param tinfo
+ * Pointer to TSO info to use.
+ * @param ctrl
+ * Pointer to the control segment in the TSO WQE.
+ *
+ * @return
+ * 0 on success, negative value upon error.
+ */
+static inline volatile struct mlx4_wqe_data_seg *
+mlx4_tx_burst_fill_tso_hdr(struct rte_mbuf *buf,
+ struct txq *txq,
+ struct tso_info *tinfo,
+ volatile struct mlx4_wqe_ctrl_seg *ctrl)
+{
+ volatile struct mlx4_wqe_lso_seg *tseg =
+ (volatile struct mlx4_wqe_lso_seg *)(ctrl + 1);
+ struct mlx4_sq *sq = &txq->msq;
+ struct pv *pv = tinfo->pv;
+ int *pv_counter = &tinfo->pv_counter;
+ int remain_size = tinfo->tso_header_size;
+ char *from = rte_pktmbuf_mtod(buf, char *);
+ uint16_t txbb_avail_space;
+ /* Union to overcome volatile constraints when copying TSO header. */
+ union {
+ volatile uint8_t *vto;
+ uint8_t *to;
+ } thdr = { .vto = (volatile uint8_t *)tseg->header, };
+
+ /*
+ * TSO data always starts at offset 20 from the beginning of the TXBB
+ * (16 byte ctrl + 4byte TSO desc). Since each TXBB is 64Byte aligned
+ * we can write the first 44 TSO header bytes without worry for TxQ
+ * wrapping or overwriting the first TXBB 32bit word.
+ */
+ txbb_avail_space = MLX4_TXBB_SIZE -
+ (sizeof(struct mlx4_wqe_ctrl_seg) +
+ sizeof(struct mlx4_wqe_lso_seg));
+ while (remain_size >= (int)(txbb_avail_space + sizeof(uint32_t))) {
+ /* Copy to end of txbb. */
+ rte_memcpy(thdr.to, from, txbb_avail_space);
+ from += txbb_avail_space;
+ thdr.to += txbb_avail_space;
+ /* New TXBB, Check for TxQ wrap. */
+ if (thdr.to >= sq->eob)
+ thdr.vto = sq->buf;
+ /* New TXBB, stash the first 32bits for later use. */
+ pv[*pv_counter].dst = (volatile uint32_t *)thdr.to;
+ pv[(*pv_counter)++].val = *(uint32_t *)from,
+ from += sizeof(uint32_t);
+ thdr.to += sizeof(uint32_t);
+ remain_size -= txbb_avail_space + sizeof(uint32_t);
+ /* Avail space in new TXBB is TXBB size - 4 */
+ txbb_avail_space = MLX4_TXBB_SIZE - sizeof(uint32_t);
+ }
+ if (remain_size > txbb_avail_space) {
+ rte_memcpy(thdr.to, from, txbb_avail_space);
+ from += txbb_avail_space;
+ thdr.to += txbb_avail_space;
+ remain_size -= txbb_avail_space;
+ /* New TXBB, Check for TxQ wrap. */
+ if (thdr.to >= sq->eob)
+ thdr.vto = sq->buf;
+ pv[*pv_counter].dst = (volatile uint32_t *)thdr.to;
+ rte_memcpy(&pv[*pv_counter].val, from, remain_size);
+ (*pv_counter)++;
+ } else if (remain_size) {
+ rte_memcpy(thdr.to, from, remain_size);
+ }
+ tseg->mss_hdr_size = rte_cpu_to_be_32((buf->tso_segsz << 16) |
+ tinfo->tso_header_size);
+ /* Calculate data segment location */
+ return (volatile struct mlx4_wqe_data_seg *)
+ ((uintptr_t)tseg + tinfo->wqe_tso_seg_size);
+}
+
+/**
+ * Write data segments and header for TSO uni/multi segment packet.
+ *
+ * @param buf
+ * Pointer to the first packet mbuf.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param ctrl
+ * Pointer to the WQE control segment.
+ *
+ * @return
+ * Pointer to the next WQE control segment on success, NULL otherwise.
+ */
+static volatile struct mlx4_wqe_ctrl_seg *
+mlx4_tx_burst_tso(struct rte_mbuf *buf, struct txq *txq,
+ volatile struct mlx4_wqe_ctrl_seg *ctrl)
+{
+ volatile struct mlx4_wqe_data_seg *dseg;
+ volatile struct mlx4_wqe_ctrl_seg *ctrl_next;
+ struct mlx4_sq *sq = &txq->msq;
+ struct tso_info tinfo;
+ struct pv *pv;
+ int pv_counter;
+ int ret;
+
+ ret = mlx4_tx_burst_tso_get_params(buf, txq, &tinfo);
+ if (unlikely(ret))
+ goto error;
+ dseg = mlx4_tx_burst_fill_tso_hdr(buf, txq, &tinfo, ctrl);
+ if (unlikely(dseg == NULL))
+ goto error;
+ if ((uintptr_t)dseg >= (uintptr_t)sq->eob)
+ dseg = (volatile struct mlx4_wqe_data_seg *)
+ ((uintptr_t)dseg - sq->size);
+ ctrl_next = mlx4_tx_burst_fill_tso_dsegs(buf, txq, &tinfo, dseg, ctrl);
+ if (unlikely(ctrl_next == NULL))
+ goto error;
+ /* Write the first DWORD of each TXBB save earlier. */
+ if (likely(tinfo.pv_counter)) {
+ pv = tinfo.pv;
+ pv_counter = tinfo.pv_counter;
+ /* Need a barrier here before writing the first TXBB word. */
+ rte_io_wmb();
+ do {
+ --pv_counter;
+ *pv[pv_counter].dst = pv[pv_counter].val;
+ } while (pv_counter > 0);
+ }
+ ctrl->fence_size = tinfo.fence_size;
+ sq->remain_size -= tinfo.wqe_size;
+ return ctrl_next;
+error:
+ txq->stats.odropped++;
+ return NULL;
+}
+
+/**
* Write data segments of multi-segment packet.
*
* @param buf
@@ -560,6 +915,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint16_t flags16[2];
} srcrb;
uint32_t lkey;
+ bool tso = txq->priv->tso && (buf->ol_flags & PKT_TX_TCP_SEG);
/* Clean up old buffer. */
if (likely(elt->buf != NULL)) {
@@ -578,7 +934,16 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
} while (tmp != NULL);
}
RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
- if (buf->nb_segs == 1) {
+ if (tso) {
+ /* Change opcode to TSO */
+ owner_opcode &= ~MLX4_OPCODE_CONFIG_CMD;
+ owner_opcode |= MLX4_OPCODE_LSO | MLX4_WQE_CTRL_RR;
+ ctrl_next = mlx4_tx_burst_tso(buf, txq, ctrl);
+ if (!ctrl_next) {
+ elt->buf = NULL;
+ break;
+ }
+ } else if (buf->nb_segs == 1) {
/* Validate WQE space in the send queue. */
if (sq->remain_size < MLX4_TXBB_SIZE) {
elt->buf = NULL;
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 4c025e3..ffa8abf 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -90,7 +90,7 @@ struct mlx4_txq_stats {
unsigned int idx; /**< Mapping index. */
uint64_t opackets; /**< Total of successfully sent packets. */
uint64_t obytes; /**< Total of successfully sent bytes. */
- uint64_t odropped; /**< Total of packets not sent when Tx ring full. */
+ uint64_t odropped; /**< Total number of packets failed to transmit. */
};
/** Tx queue descriptor. */
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 6edaadb..9aa7440 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -116,8 +116,14 @@ mlx4_get_tx_port_offloads(struct priv *priv)
DEV_TX_OFFLOAD_UDP_CKSUM |
DEV_TX_OFFLOAD_TCP_CKSUM);
}
- if (priv->hw_csum_l2tun)
+ if (priv->tso)
+ offloads |= DEV_TX_OFFLOAD_TCP_TSO;
+ if (priv->hw_csum_l2tun) {
offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
+ if (priv->tso)
+ offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+ DEV_TX_OFFLOAD_GRE_TNL_TSO);
+ }
return offloads;
}