summaryrefslogtreecommitdiff
path: root/lib/librte_acl
diff options
context:
space:
mode:
authorKonstantin Ananyev <konstantin.ananyev@intel.com>2015-01-20 18:41:05 +0000
committerThomas Monjalon <thomas.monjalon@6wind.com>2015-01-28 17:11:26 +0100
commit62945e029e86bba5fb833e969c3c22f84d58c1cb (patch)
tree42830f5a52626b436bac11a2ce59e3ad439af6c9 /lib/librte_acl
parenta0e3310e7a4f92622f7d86369c8f8e3c389edf18 (diff)
downloaddpdk-62945e029e86bba5fb833e969c3c22f84d58c1cb.zip
dpdk-62945e029e86bba5fb833e969c3c22f84d58c1cb.tar.gz
dpdk-62945e029e86bba5fb833e969c3c22f84d58c1cb.tar.xz
acl: introduce config parameter for performance/space trade-off
If at build phase we don't make any trie splitting, then temporary build structures and resulting RT structure might be much bigger than current. >From other side - having just one trie instead of multiple can speedup search quite significantly. >From my measurements on rule-sets with ~10K rules: RT table up to 8 times bigger, classify() up to 80% faster than current implementation. To make it possible for the user to decide about performance/space trade-off - new parameter for build config structure (max_size) is introduced. Setting it to the value greater than zero, instructs rte_acl_build() to: - make sure that size of RT table wouldn't exceed given value. - attempt to minimise number of tries in the table. Setting it to zero maintains current behaviour. That introduces a minor change in the public API, but I think the possible performance gain is too big to ignore it. Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com> Acked-by: Neil Horman <nhorman@tuxdriver.com>
Diffstat (limited to 'lib/librte_acl')
-rw-r--r--lib/librte_acl/acl.h2
-rw-r--r--lib/librte_acl/acl_bld.c134
-rw-r--r--lib/librte_acl/acl_gen.c22
-rw-r--r--lib/librte_acl/rte_acl.c1
-rw-r--r--lib/librte_acl/rte_acl.h2
5 files changed, 106 insertions, 55 deletions
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index d33d7ad..61b849a 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -180,7 +180,7 @@ struct rte_acl_ctx {
int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
- uint32_t num_categories, uint32_t data_index_sz);
+ uint32_t num_categories, uint32_t data_index_sz, size_t max_size);
typedef int (*rte_acl_classify_t)
(const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t);
diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
index 1fd59ee..1fe79fb 100644
--- a/lib/librte_acl/acl_bld.c
+++ b/lib/librte_acl/acl_bld.c
@@ -41,10 +41,9 @@
/* number of pointers per alloc */
#define ACL_PTR_ALLOC 32
-/* variable for dividing rule sets */
-#define NODE_MAX 2500
-#define NODE_PERCENTAGE (0.40)
-#define RULE_PERCENTAGE (0.40)
+/* macros for dividing rule sets heuristics */
+#define NODE_MAX 0x4000
+#define NODE_MIN 0x800
/* TALLY are statistics per field */
enum {
@@ -97,6 +96,7 @@ struct acl_build_context {
const struct rte_acl_ctx *acx;
struct rte_acl_build_rule *build_rules;
struct rte_acl_config cfg;
+ int32_t node_max;
uint32_t node;
uint32_t num_nodes;
uint32_t category_mask;
@@ -1447,7 +1447,7 @@ build_trie(struct acl_build_context *context, struct rte_acl_build_rule *head,
return NULL;
node_count = context->num_nodes - node_count;
- if (node_count > NODE_MAX) {
+ if (node_count > context->node_max) {
*last = prev;
return trie;
}
@@ -1628,6 +1628,9 @@ rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2)
return 0;
}
+/*
+ * Sort list of rules based on the rules wildness.
+ */
static struct rte_acl_build_rule *
sort_rules(struct rte_acl_build_rule *head)
{
@@ -1636,21 +1639,22 @@ sort_rules(struct rte_acl_build_rule *head)
new_head = NULL;
while (head != NULL) {
+
+ /* remove element from the head of the old list. */
r = head;
head = r->next;
r->next = NULL;
- if (new_head == NULL) {
- new_head = r;
- } else {
- for (p = &new_head;
- (l = *p) != NULL &&
- rule_cmp_wildness(l, r) >= 0;
- p = &l->next)
- ;
-
- r->next = *p;
- *p = r;
- }
+
+ /* walk through new sorted list to find a proper place. */
+ for (p = &new_head;
+ (l = *p) != NULL &&
+ rule_cmp_wildness(l, r) >= 0;
+ p = &l->next)
+ ;
+
+ /* insert element into the new sorted list. */
+ r->next = *p;
+ *p = r;
}
return new_head;
@@ -1789,9 +1793,11 @@ acl_build_log(const struct acl_build_context *ctx)
uint32_t n;
RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n"
+ "node limit for tree split: %u\n"
"nodes created: %u\n"
"memory consumed: %zu\n",
ctx->acx->name,
+ ctx->node_max,
ctx->num_nodes,
ctx->pool.alloc);
@@ -1868,11 +1874,48 @@ acl_set_data_indexes(struct rte_acl_ctx *ctx)
}
}
+/*
+ * Internal routine, performs 'build' phase of trie generation:
+ * - setups build context.
+ * - analizes given set of rules.
+ * - builds internal tree(s).
+ */
+static int
+acl_bld(struct acl_build_context *bcx, struct rte_acl_ctx *ctx,
+ const struct rte_acl_config *cfg, uint32_t node_max)
+{
+ int32_t rc;
+
+ /* setup build context. */
+ memset(bcx, 0, sizeof(*bcx));
+ bcx->acx = ctx;
+ bcx->pool.alignment = ACL_POOL_ALIGN;
+ bcx->pool.min_alloc = ACL_POOL_ALLOC_MIN;
+ bcx->cfg = *cfg;
+ bcx->category_mask = LEN2MASK(bcx->cfg.num_categories);
+ bcx->node_max = node_max;
+
+ /* Create a build rules copy. */
+ rc = acl_build_rules(bcx);
+ if (rc != 0)
+ return rc;
+
+ /* No rules to build for that context+config */
+ if (bcx->build_rules == NULL) {
+ rc = -EINVAL;
+ } else {
+ /* build internal trie representation. */
+ rc = acl_build_tries(bcx, bcx->build_rules);
+ }
+ return rc;
+}
int
rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
{
- int rc;
+ int32_t rc;
+ uint32_t n;
+ size_t max_size;
struct acl_build_context bcx;
if (ctx == NULL || cfg == NULL || cfg->num_categories == 0 ||
@@ -1881,44 +1924,39 @@ rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
acl_build_reset(ctx);
- memset(&bcx, 0, sizeof(bcx));
- bcx.acx = ctx;
- bcx.pool.alignment = ACL_POOL_ALIGN;
- bcx.pool.min_alloc = ACL_POOL_ALLOC_MIN;
- bcx.cfg = *cfg;
- bcx.category_mask = LEN2MASK(bcx.cfg.num_categories);
-
-
- /* Create a build rules copy. */
- rc = acl_build_rules(&bcx);
- if (rc != 0)
- return rc;
+ if (cfg->max_size == 0) {
+ n = NODE_MIN;
+ max_size = SIZE_MAX;
+ } else {
+ n = NODE_MAX;
+ max_size = cfg->max_size;
+ }
- /* No rules to build for that context+config */
- if (bcx.build_rules == NULL) {
- rc = -EINVAL;
+ for (rc = -ERANGE; n >= NODE_MIN && rc == -ERANGE; n /= 2) {
- /* build internal trie representation. */
- } else if ((rc = acl_build_tries(&bcx, bcx.build_rules)) == 0) {
+ /* perform build phase. */
+ rc = acl_bld(&bcx, ctx, cfg, n);
- /* allocate and fill run-time structures. */
- rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries,
+ if (rc == 0) {
+ /* allocate and fill run-time structures. */
+ rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries,
bcx.num_tries, bcx.cfg.num_categories,
RTE_ACL_MAX_FIELDS * RTE_DIM(bcx.tries) *
- sizeof(ctx->data_indexes[0]));
- if (rc == 0) {
+ sizeof(ctx->data_indexes[0]), max_size);
+ if (rc == 0) {
+ /* set data indexes. */
+ acl_set_data_indexes(ctx);
- /* set data indexes. */
- acl_set_data_indexes(ctx);
-
- /* copy in build config. */
- ctx->config = *cfg;
+ /* copy in build config. */
+ ctx->config = *cfg;
+ }
}
- }
- acl_build_log(&bcx);
+ acl_build_log(&bcx);
+
+ /* cleanup after build. */
+ tb_free_pool(&bcx.pool);
+ }
- /* cleanup after build. */
- tb_free_pool(&bcx.pool);
return rc;
}
diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c
index d3def66..ea557ab 100644
--- a/lib/librte_acl/acl_gen.c
+++ b/lib/librte_acl/acl_gen.c
@@ -32,7 +32,6 @@
*/
#include <rte_acl.h>
-#include "acl_vect.h"
#include "acl.h"
#define QRANGE_MIN ((uint8_t)INT8_MIN)
@@ -63,7 +62,8 @@ struct rte_acl_indices {
static void
acl_gen_log_stats(const struct rte_acl_ctx *ctx,
const struct acl_node_counters *counts,
- const struct rte_acl_indices *indices)
+ const struct rte_acl_indices *indices,
+ size_t max_size)
{
RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
"runtime memory footprint on socket %d:\n"
@@ -71,7 +71,8 @@ acl_gen_log_stats(const struct rte_acl_ctx *ctx,
"quad nodes/vectors/bytes used: %d/%d/%zu\n"
"DFA nodes/group64/bytes used: %d/%d/%zu\n"
"match nodes/bytes used: %d/%zu\n"
- "total: %zu bytes\n",
+ "total: %zu bytes\n"
+ "max limit: %zu bytes\n",
ctx->name, ctx->socket_id,
counts->single, counts->single * sizeof(uint64_t),
counts->quad, counts->quad_vectors,
@@ -80,7 +81,8 @@ acl_gen_log_stats(const struct rte_acl_ctx *ctx,
indices->dfa_index * sizeof(uint64_t),
counts->match,
counts->match * sizeof(struct rte_acl_match_results),
- ctx->mem_sz);
+ ctx->mem_sz,
+ max_size);
}
static uint64_t
@@ -474,7 +476,7 @@ acl_calc_counts_indices(struct acl_node_counters *counts,
int
rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
- uint32_t num_categories, uint32_t data_index_sz)
+ uint32_t num_categories, uint32_t data_index_sz, size_t max_size)
{
void *mem;
size_t total_size;
@@ -496,6 +498,14 @@ rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
(counts.match + 1) * sizeof(struct rte_acl_match_results) +
XMM_SIZE;
+ if (total_size > max_size) {
+ RTE_LOG(DEBUG, ACL,
+ "Gen phase for ACL ctx \"%s\" exceeds max_size limit, "
+ "bytes required: %zu, allowed: %zu\n",
+ ctx->name, total_size, max_size);
+ return -ERANGE;
+ }
+
mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
ctx->socket_id);
if (mem == NULL) {
@@ -546,6 +556,6 @@ rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
ctx->trans_table = node_array;
memcpy(ctx->trie, trie, sizeof(ctx->trie));
- acl_gen_log_stats(ctx, &counts, &indices);
+ acl_gen_log_stats(ctx, &counts, &indices, max_size);
return 0;
}
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index a9cd349..7d10301 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -543,6 +543,7 @@ rte_acl_ipv4vlan_build(struct rte_acl_ctx *ctx,
if (ctx == NULL || layout == NULL)
return -EINVAL;
+ memset(&cfg, 0, sizeof(cfg));
acl_ipv4vlan_config(&cfg, layout, num_categories);
return rte_acl_build(ctx, &cfg);
}
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
index 652a234..30aea03 100644
--- a/lib/librte_acl/rte_acl.h
+++ b/lib/librte_acl/rte_acl.h
@@ -94,6 +94,8 @@ struct rte_acl_config {
uint32_t num_fields; /**< Number of field definitions. */
struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS];
/**< array of field definitions. */
+ size_t max_size;
+ /**< max memory limit for internal run-time structures. */
};
/**