diff options
author | Konstantin Ananyev <konstantin.ananyev@intel.com> | 2015-01-20 18:41:05 +0000 |
---|---|---|
committer | Thomas Monjalon <thomas.monjalon@6wind.com> | 2015-01-28 17:11:26 +0100 |
commit | 62945e029e86bba5fb833e969c3c22f84d58c1cb (patch) | |
tree | 42830f5a52626b436bac11a2ce59e3ad439af6c9 /lib/librte_acl | |
parent | a0e3310e7a4f92622f7d86369c8f8e3c389edf18 (diff) | |
download | dpdk-62945e029e86bba5fb833e969c3c22f84d58c1cb.zip dpdk-62945e029e86bba5fb833e969c3c22f84d58c1cb.tar.gz dpdk-62945e029e86bba5fb833e969c3c22f84d58c1cb.tar.xz |
acl: introduce config parameter for performance/space trade-off
If at build phase we don't make any trie splitting,
then temporary build structures and resulting RT structure might be
much bigger than current.
>From other side - having just one trie instead of multiple can speedup
search quite significantly.
>From my measurements on rule-sets with ~10K rules:
RT table up to 8 times bigger, classify() up to 80% faster
than current implementation.
To make it possible for the user to decide about performance/space trade-off -
new parameter for build config structure (max_size) is introduced.
Setting it to the value greater than zero, instructs rte_acl_build() to:
- make sure that size of RT table wouldn't exceed given value.
- attempt to minimise number of tries in the table.
Setting it to zero maintains current behaviour.
That introduces a minor change in the public API, but I think the possible
performance gain is too big to ignore it.
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Diffstat (limited to 'lib/librte_acl')
-rw-r--r-- | lib/librte_acl/acl.h | 2 | ||||
-rw-r--r-- | lib/librte_acl/acl_bld.c | 134 | ||||
-rw-r--r-- | lib/librte_acl/acl_gen.c | 22 | ||||
-rw-r--r-- | lib/librte_acl/rte_acl.c | 1 | ||||
-rw-r--r-- | lib/librte_acl/rte_acl.h | 2 |
5 files changed, 106 insertions, 55 deletions
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h index d33d7ad..61b849a 100644 --- a/lib/librte_acl/acl.h +++ b/lib/librte_acl/acl.h @@ -180,7 +180,7 @@ struct rte_acl_ctx { int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, - uint32_t num_categories, uint32_t data_index_sz); + uint32_t num_categories, uint32_t data_index_sz, size_t max_size); typedef int (*rte_acl_classify_t) (const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t); diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c index 1fd59ee..1fe79fb 100644 --- a/lib/librte_acl/acl_bld.c +++ b/lib/librte_acl/acl_bld.c @@ -41,10 +41,9 @@ /* number of pointers per alloc */ #define ACL_PTR_ALLOC 32 -/* variable for dividing rule sets */ -#define NODE_MAX 2500 -#define NODE_PERCENTAGE (0.40) -#define RULE_PERCENTAGE (0.40) +/* macros for dividing rule sets heuristics */ +#define NODE_MAX 0x4000 +#define NODE_MIN 0x800 /* TALLY are statistics per field */ enum { @@ -97,6 +96,7 @@ struct acl_build_context { const struct rte_acl_ctx *acx; struct rte_acl_build_rule *build_rules; struct rte_acl_config cfg; + int32_t node_max; uint32_t node; uint32_t num_nodes; uint32_t category_mask; @@ -1447,7 +1447,7 @@ build_trie(struct acl_build_context *context, struct rte_acl_build_rule *head, return NULL; node_count = context->num_nodes - node_count; - if (node_count > NODE_MAX) { + if (node_count > context->node_max) { *last = prev; return trie; } @@ -1628,6 +1628,9 @@ rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2) return 0; } +/* + * Sort list of rules based on the rules wildness. + */ static struct rte_acl_build_rule * sort_rules(struct rte_acl_build_rule *head) { @@ -1636,21 +1639,22 @@ sort_rules(struct rte_acl_build_rule *head) new_head = NULL; while (head != NULL) { + + /* remove element from the head of the old list. */ r = head; head = r->next; r->next = NULL; - if (new_head == NULL) { - new_head = r; - } else { - for (p = &new_head; - (l = *p) != NULL && - rule_cmp_wildness(l, r) >= 0; - p = &l->next) - ; - - r->next = *p; - *p = r; - } + + /* walk through new sorted list to find a proper place. */ + for (p = &new_head; + (l = *p) != NULL && + rule_cmp_wildness(l, r) >= 0; + p = &l->next) + ; + + /* insert element into the new sorted list. */ + r->next = *p; + *p = r; } return new_head; @@ -1789,9 +1793,11 @@ acl_build_log(const struct acl_build_context *ctx) uint32_t n; RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n" + "node limit for tree split: %u\n" "nodes created: %u\n" "memory consumed: %zu\n", ctx->acx->name, + ctx->node_max, ctx->num_nodes, ctx->pool.alloc); @@ -1868,11 +1874,48 @@ acl_set_data_indexes(struct rte_acl_ctx *ctx) } } +/* + * Internal routine, performs 'build' phase of trie generation: + * - setups build context. + * - analizes given set of rules. + * - builds internal tree(s). + */ +static int +acl_bld(struct acl_build_context *bcx, struct rte_acl_ctx *ctx, + const struct rte_acl_config *cfg, uint32_t node_max) +{ + int32_t rc; + + /* setup build context. */ + memset(bcx, 0, sizeof(*bcx)); + bcx->acx = ctx; + bcx->pool.alignment = ACL_POOL_ALIGN; + bcx->pool.min_alloc = ACL_POOL_ALLOC_MIN; + bcx->cfg = *cfg; + bcx->category_mask = LEN2MASK(bcx->cfg.num_categories); + bcx->node_max = node_max; + + /* Create a build rules copy. */ + rc = acl_build_rules(bcx); + if (rc != 0) + return rc; + + /* No rules to build for that context+config */ + if (bcx->build_rules == NULL) { + rc = -EINVAL; + } else { + /* build internal trie representation. */ + rc = acl_build_tries(bcx, bcx->build_rules); + } + return rc; +} int rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg) { - int rc; + int32_t rc; + uint32_t n; + size_t max_size; struct acl_build_context bcx; if (ctx == NULL || cfg == NULL || cfg->num_categories == 0 || @@ -1881,44 +1924,39 @@ rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg) acl_build_reset(ctx); - memset(&bcx, 0, sizeof(bcx)); - bcx.acx = ctx; - bcx.pool.alignment = ACL_POOL_ALIGN; - bcx.pool.min_alloc = ACL_POOL_ALLOC_MIN; - bcx.cfg = *cfg; - bcx.category_mask = LEN2MASK(bcx.cfg.num_categories); - - - /* Create a build rules copy. */ - rc = acl_build_rules(&bcx); - if (rc != 0) - return rc; + if (cfg->max_size == 0) { + n = NODE_MIN; + max_size = SIZE_MAX; + } else { + n = NODE_MAX; + max_size = cfg->max_size; + } - /* No rules to build for that context+config */ - if (bcx.build_rules == NULL) { - rc = -EINVAL; + for (rc = -ERANGE; n >= NODE_MIN && rc == -ERANGE; n /= 2) { - /* build internal trie representation. */ - } else if ((rc = acl_build_tries(&bcx, bcx.build_rules)) == 0) { + /* perform build phase. */ + rc = acl_bld(&bcx, ctx, cfg, n); - /* allocate and fill run-time structures. */ - rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries, + if (rc == 0) { + /* allocate and fill run-time structures. */ + rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries, bcx.num_tries, bcx.cfg.num_categories, RTE_ACL_MAX_FIELDS * RTE_DIM(bcx.tries) * - sizeof(ctx->data_indexes[0])); - if (rc == 0) { + sizeof(ctx->data_indexes[0]), max_size); + if (rc == 0) { + /* set data indexes. */ + acl_set_data_indexes(ctx); - /* set data indexes. */ - acl_set_data_indexes(ctx); - - /* copy in build config. */ - ctx->config = *cfg; + /* copy in build config. */ + ctx->config = *cfg; + } } - } - acl_build_log(&bcx); + acl_build_log(&bcx); + + /* cleanup after build. */ + tb_free_pool(&bcx.pool); + } - /* cleanup after build. */ - tb_free_pool(&bcx.pool); return rc; } diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c index d3def66..ea557ab 100644 --- a/lib/librte_acl/acl_gen.c +++ b/lib/librte_acl/acl_gen.c @@ -32,7 +32,6 @@ */ #include <rte_acl.h> -#include "acl_vect.h" #include "acl.h" #define QRANGE_MIN ((uint8_t)INT8_MIN) @@ -63,7 +62,8 @@ struct rte_acl_indices { static void acl_gen_log_stats(const struct rte_acl_ctx *ctx, const struct acl_node_counters *counts, - const struct rte_acl_indices *indices) + const struct rte_acl_indices *indices, + size_t max_size) { RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n" "runtime memory footprint on socket %d:\n" @@ -71,7 +71,8 @@ acl_gen_log_stats(const struct rte_acl_ctx *ctx, "quad nodes/vectors/bytes used: %d/%d/%zu\n" "DFA nodes/group64/bytes used: %d/%d/%zu\n" "match nodes/bytes used: %d/%zu\n" - "total: %zu bytes\n", + "total: %zu bytes\n" + "max limit: %zu bytes\n", ctx->name, ctx->socket_id, counts->single, counts->single * sizeof(uint64_t), counts->quad, counts->quad_vectors, @@ -80,7 +81,8 @@ acl_gen_log_stats(const struct rte_acl_ctx *ctx, indices->dfa_index * sizeof(uint64_t), counts->match, counts->match * sizeof(struct rte_acl_match_results), - ctx->mem_sz); + ctx->mem_sz, + max_size); } static uint64_t @@ -474,7 +476,7 @@ acl_calc_counts_indices(struct acl_node_counters *counts, int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, - uint32_t num_categories, uint32_t data_index_sz) + uint32_t num_categories, uint32_t data_index_sz, size_t max_size) { void *mem; size_t total_size; @@ -496,6 +498,14 @@ rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, (counts.match + 1) * sizeof(struct rte_acl_match_results) + XMM_SIZE; + if (total_size > max_size) { + RTE_LOG(DEBUG, ACL, + "Gen phase for ACL ctx \"%s\" exceeds max_size limit, " + "bytes required: %zu, allowed: %zu\n", + ctx->name, total_size, max_size); + return -ERANGE; + } + mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE, ctx->socket_id); if (mem == NULL) { @@ -546,6 +556,6 @@ rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, ctx->trans_table = node_array; memcpy(ctx->trie, trie, sizeof(ctx->trie)); - acl_gen_log_stats(ctx, &counts, &indices); + acl_gen_log_stats(ctx, &counts, &indices, max_size); return 0; } diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c index a9cd349..7d10301 100644 --- a/lib/librte_acl/rte_acl.c +++ b/lib/librte_acl/rte_acl.c @@ -543,6 +543,7 @@ rte_acl_ipv4vlan_build(struct rte_acl_ctx *ctx, if (ctx == NULL || layout == NULL) return -EINVAL; + memset(&cfg, 0, sizeof(cfg)); acl_ipv4vlan_config(&cfg, layout, num_categories); return rte_acl_build(ctx, &cfg); } diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h index 652a234..30aea03 100644 --- a/lib/librte_acl/rte_acl.h +++ b/lib/librte_acl/rte_acl.h @@ -94,6 +94,8 @@ struct rte_acl_config { uint32_t num_fields; /**< Number of field definitions. */ struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS]; /**< array of field definitions. */ + size_t max_size; + /**< max memory limit for internal run-time structures. */ }; /** |