summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuifeng Wang <ruifeng.wang@arm.com>2019-08-28 16:24:53 +0800
committerFerruh Yigit <ferruh.yigit@intel.com>2019-09-03 17:12:38 +0200
commit18b7d4eb3dca9e24208c8be59a8972e7f9d7d1cf (patch)
tree68ff732708cbdaad42ff3ddab3800c2fd69e8f1d
parentf1f0f39806d97a9a4d74d47ce7fb04e9b4943e08 (diff)
downloaddpdk-next-eventdev-18b7d4eb3dca9e24208c8be59a8972e7f9d7d1cf.zip
dpdk-next-eventdev-18b7d4eb3dca9e24208c8be59a8972e7f9d7d1cf.tar.gz
dpdk-next-eventdev-18b7d4eb3dca9e24208c8be59a8972e7f9d7d1cf.tar.xz
net/ixgbe: remove memory barrier from NEON Rx
The memory barrier was intended for descriptor data integrity (see comments in [1]). As later NEON loads were implemented and a whole entry is loaded in one-run and atomic, that makes the ordering of partial loading unnecessary. Remove it accordingly. Corrected couple of code comments. In terms of performance, observed slightly higher average throughput in tests with 82599ES NIC. [1] http://patches.dpdk.org/patch/18153/ Fixes: 989a84050542 ("net/ixgbe: fix received packets number for ARM NEON") Cc: stable@dpdk.org Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
-rw-r--r--drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c5
1 files changed, 2 insertions, 3 deletions
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index edb1383..86fb3af 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -214,13 +214,13 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
uint32_t var = 0;
uint32_t stat;
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
/* B.2 copy 2 mbuf point into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
/* A. load 4 pkts descs */
@@ -228,7 +228,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
descs[1] = vld1q_u64((uint64_t *)(rxdp + 1));
descs[2] = vld1q_u64((uint64_t *)(rxdp + 2));
descs[3] = vld1q_u64((uint64_t *)(rxdp + 3));
- rte_smp_rmb();
/* B.2 copy 2 mbuf point into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);