summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGavin Hu <gavin.hu@arm.com>2018-11-09 19:42:47 +0800
committerThomas Monjalon <thomas@monjalon.net>2018-11-13 17:00:58 +0100
commit49594a63147a994d9674b1f479d0107e70fe1cbc (patch)
tree50b3d5464fc81e2bbb9a84a7203a4a01350c93cc
parent86757c2c3ed5006940f93725d39131dfb0d09b60 (diff)
downloaddpdk-49594a63147a994d9674b1f479d0107e70fe1cbc.zip
dpdk-49594a63147a994d9674b1f479d0107e70fe1cbc.tar.gz
dpdk-49594a63147a994d9674b1f479d0107e70fe1cbc.tar.xz
ring/c11: relax ordering for load and store of the head
When calling __atomic_compare_exchange_n, use relaxed ordering for the success case, as multiple producers/consumers do not release updates to each other so no need for acquire or release ordering. Because the thread fence in place, ordering for the first iteration can be relaxed. Run the ring perf test on the following testbed: HW: ThunderX2 B0 CPU CN9975 v2.0, 2 sockets, 28core,4 threads/core,2.5GHz OS: Ubuntu 16.04.5 LTS, Kernel: 4.15.0-36-generic DPDK: 18.08, Configuration: arm64-armv8a-linuxapp-gcc gcc: 8.1.0 $sudo ./test/test/test -l 16-19,44-47,72-75,100-103 -n 4 \ --socket-mem=1024 -- -i Without the patch: *** Testing using two physical cores *** SP/SC bulk enq/dequeue (size: 8): 5.75 MP/MC bulk enq/dequeue (size: 8): 10.18 SP/SC bulk enq/dequeue (size: 32): 1.80 MP/MC bulk enq/dequeue (size: 32): 2.34 With the patch: *** Testing using two physical cores *** SP/SC bulk enq/dequeue (size: 8): 5.59 MP/MC bulk enq/dequeue (size: 8): 10.54 SP/SC bulk enq/dequeue (size: 32): 1.73 MP/MC bulk enq/dequeue (size: 32): 2.38 No significant improvement, nor regression was seen, as the optimisation is not at the critical path. Fixes: 39368ebfc6 ("ring: introduce C11 memory model barrier option") Cc: stable@dpdk.org Signed-off-by: Gavin Hu <gavin.hu@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Reviewed-by: Steve Capper <steve.capper@arm.com> Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
-rw-r--r--lib/librte_ring/rte_ring_c11_mem.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h
index dc49a99..0fb73a3 100644
--- a/lib/librte_ring/rte_ring_c11_mem.h
+++ b/lib/librte_ring/rte_ring_c11_mem.h
@@ -61,7 +61,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
unsigned int max = n;
int success;
- *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE);
+ *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);
do {
/* Reset n to the initial burst count */
n = max;
@@ -97,7 +97,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
/* on failure, *old_head is updated */
success = __atomic_compare_exchange_n(&r->prod.head,
old_head, *new_head,
- 0, __ATOMIC_ACQUIRE,
+ 0, __ATOMIC_RELAXED,
__ATOMIC_RELAXED);
} while (unlikely(success == 0));
return n;
@@ -137,7 +137,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
int success;
/* move cons.head atomically */
- *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE);
+ *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);
do {
/* Restore n as it may change every loop */
n = max;
@@ -172,7 +172,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
/* on failure, *old_head will be updated */
success = __atomic_compare_exchange_n(&r->cons.head,
old_head, *new_head,
- 0, __ATOMIC_ACQUIRE,
+ 0, __ATOMIC_RELAXED,
__ATOMIC_RELAXED);
} while (unlikely(success == 0));
return n;