summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Medvedkin <vladimir.medvedkin@intel.com>2019-11-01 15:21:34 +0000
committerThomas Monjalon <thomas@monjalon.net>2019-11-06 00:08:56 +0100
commit5a5793a5ffa2231ae759de824fb8bfc970e5d4f7 (patch)
treecde2826d9916b9046066cf4ea2d9e3a6842f60de
parent38383890b21f995c3dafa177298d744e1db0be84 (diff)
downloaddpdk-5a5793a5ffa2231ae759de824fb8bfc970e5d4f7.zip
dpdk-5a5793a5ffa2231ae759de824fb8bfc970e5d4f7.tar.gz
dpdk-5a5793a5ffa2231ae759de824fb8bfc970e5d4f7.tar.xz
rib: add RIB library
Add RIB (Routing Information Base) library. This library implements an IPv4 routing table optimized for control plane operations. It implements a control plane struct containing routes in a tree and provides fast add/del operations for routes. Also it allows to perform fast subtree traversals (i.e. retrieve existing subroutes for a given prefix). This structure will be used as a control plane helper structure for FIB implementation. Also it might be used standalone in other different places such as bitmaps for example. Internal implementation is level compressed binary trie. Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
-rw-r--r--MAINTAINERS4
-rw-r--r--config/common_base5
-rw-r--r--doc/api/doxy-api.conf.in1
-rw-r--r--doc/guides/rel_notes/release_19_11.rst3
-rw-r--r--lib/Makefile2
-rw-r--r--lib/librte_rib/Makefile25
-rw-r--r--lib/librte_rib/meson.build8
-rw-r--r--lib/librte_rib/rte_rib.c532
-rw-r--r--lib/librte_rib/rte_rib.h277
-rw-r--r--lib/librte_rib/rte_rib_version.map20
-rw-r--r--lib/meson.build2
-rw-r--r--mk/rte.app.mk1
12 files changed, 879 insertions, 1 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 717c318..43d07ff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1317,6 +1317,10 @@ F: lib/librte_member/
F: doc/guides/prog_guide/member_lib.rst
F: app/test/test_member*
+RIB - EXPERIMENTAL
+M: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
+F: lib/librte_rib/
+
Traffic metering
M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
F: lib/librte_meter/
diff --git a/config/common_base b/config/common_base
index b2be3d9..09029b7 100644
--- a/config/common_base
+++ b/config/common_base
@@ -909,6 +909,11 @@ CONFIG_RTE_LIBRTE_RCU=y
CONFIG_RTE_LIBRTE_RCU_DEBUG=n
#
+# Compile librte_rib
+#
+CONFIG_RTE_LIBRTE_RIB=y
+
+#
# Compile librte_lpm
#
CONFIG_RTE_LIBRTE_LPM=y
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index d8dafb2..b76a3d3 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -55,6 +55,7 @@ INPUT = @TOPDIR@/doc/api/doxy-api-index.md \
@TOPDIR@/lib/librte_rawdev \
@TOPDIR@/lib/librte_rcu \
@TOPDIR@/lib/librte_reorder \
+ @TOPDIR@/lib/librte_rib \
@TOPDIR@/lib/librte_ring \
@TOPDIR@/lib/librte_sched \
@TOPDIR@/lib/librte_security \
diff --git a/doc/guides/rel_notes/release_19_11.rst b/doc/guides/rel_notes/release_19_11.rst
index ae8e7b2..bf1333d 100644
--- a/doc/guides/rel_notes/release_19_11.rst
+++ b/doc/guides/rel_notes/release_19_11.rst
@@ -226,6 +226,8 @@ New Features
Added eBPF JIT support for arm64 architecture to improve the eBPF program
performance.
+* **Added RIB (Routing Information Base) library.**
+
* **Updated testpmd.**
* Added a console command to testpmd app, ``show port (port_id) ptypes`` which
@@ -433,6 +435,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_port.so.3
librte_power.so.1
librte_rawdev.so.1
+ + librte_rib.so.1
librte_rcu.so.1
librte_reorder.so.1
librte_ring.so.2
diff --git a/lib/Makefile b/lib/Makefile
index 41c463d..aa5ee1e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -51,6 +51,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
DEPDIRS-librte_hash := librte_eal librte_ring
DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib
+DEPDIRS-librte_rib := librte_eal librte_mempool
DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
DEPDIRS-librte_lpm := librte_eal librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
diff --git a/lib/librte_rib/Makefile b/lib/librte_rib/Makefile
new file mode 100644
index 0000000..79f259a
--- /dev/null
+++ b/lib/librte_rib/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+# Copyright(c) 2019 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_rib.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDLIBS += -lrte_eal -lrte_mempool
+
+EXPORT_MAP := rte_rib_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_RIB) := rte_rib.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_RIB)-include := rte_rib.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_rib/meson.build b/lib/librte_rib/meson.build
new file mode 100644
index 0000000..e7b8920
--- /dev/null
+++ b/lib/librte_rib/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+# Copyright(c) 2019 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('rte_rib.c')
+headers = files('rte_rib.h')
+deps += ['mempool']
diff --git a/lib/librte_rib/rte_rib.c b/lib/librte_rib/rte_rib.c
new file mode 100644
index 0000000..55d612d
--- /dev/null
+++ b/lib/librte_rib/rte_rib.c
@@ -0,0 +1,532 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_mempool.h>
+#include <rte_rwlock.h>
+#include <rte_string_fns.h>
+#include <rte_tailq.h>
+
+#include <rte_rib.h>
+
+TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_rib_tailq = {
+ .name = "RTE_RIB",
+};
+EAL_REGISTER_TAILQ(rte_rib_tailq)
+
+#define RTE_RIB_VALID_NODE 1
+/* Maximum depth value possible for IPv4 RIB. */
+#define RIB_MAXDEPTH 32
+/* Maximum length of a RIB name. */
+#define RTE_RIB_NAMESIZE 64
+
+struct rte_rib_node {
+ struct rte_rib_node *left;
+ struct rte_rib_node *right;
+ struct rte_rib_node *parent;
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t flag;
+ uint64_t nh;
+ __extension__ uint64_t ext[0];
+};
+
+struct rte_rib {
+ char name[RTE_RIB_NAMESIZE];
+ struct rte_rib_node *tree;
+ struct rte_mempool *node_pool;
+ uint32_t cur_nodes;
+ uint32_t cur_routes;
+ uint32_t max_nodes;
+};
+
+static inline bool
+is_valid_node(struct rte_rib_node *node)
+{
+ return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
+}
+
+static inline bool
+is_right_node(struct rte_rib_node *node)
+{
+ return node->parent->right == node;
+}
+
+/*
+ * Check if ip1 is covered by ip2/depth prefix
+ */
+static inline bool
+is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth)
+{
+ return ((ip1 ^ ip2) & rte_rib_depth_to_mask(depth)) == 0;
+}
+
+static inline struct rte_rib_node *
+get_nxt_node(struct rte_rib_node *node, uint32_t ip)
+{
+ return (ip & (1 << (31 - node->depth))) ? node->right : node->left;
+}
+
+static struct rte_rib_node *
+node_alloc(struct rte_rib *rib)
+{
+ struct rte_rib_node *ent;
+ int ret;
+
+ ret = rte_mempool_get(rib->node_pool, (void *)&ent);
+ if (unlikely(ret != 0))
+ return NULL;
+ ++rib->cur_nodes;
+ return ent;
+}
+
+static void
+node_free(struct rte_rib *rib, struct rte_rib_node *ent)
+{
+ --rib->cur_nodes;
+ rte_mempool_put(rib->node_pool, ent);
+}
+
+struct rte_rib_node *
+rte_rib_lookup(struct rte_rib *rib, uint32_t ip)
+{
+ struct rte_rib_node *cur, *prev = NULL;
+
+ if (rib == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ cur = rib->tree;
+ while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
+ if (is_valid_node(cur))
+ prev = cur;
+ cur = get_nxt_node(cur, ip);
+ }
+ return prev;
+}
+
+struct rte_rib_node *
+rte_rib_lookup_parent(struct rte_rib_node *ent)
+{
+ struct rte_rib_node *tmp;
+
+ if (ent == NULL)
+ return NULL;
+ tmp = ent->parent;
+ while ((tmp != NULL) && !is_valid_node(tmp))
+ tmp = tmp->parent;
+ return tmp;
+}
+
+static struct rte_rib_node *
+__rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
+{
+ struct rte_rib_node *cur;
+
+ cur = rib->tree;
+ while (cur != NULL) {
+ if ((cur->ip == ip) && (cur->depth == depth) &&
+ is_valid_node(cur))
+ return cur;
+ if ((cur->depth > depth) ||
+ !is_covered(ip, cur->ip, cur->depth))
+ break;
+ cur = get_nxt_node(cur, ip);
+ }
+ return NULL;
+}
+
+struct rte_rib_node *
+rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth)
+{
+ if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ ip &= rte_rib_depth_to_mask(depth);
+
+ return __rib_lookup_exact(rib, ip, depth);
+}
+
+/*
+ * Traverses on subtree and retrieves more specific routes
+ * for a given in args ip/depth prefix
+ * last = NULL means the first invocation
+ */
+struct rte_rib_node *
+rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip,
+ uint8_t depth, struct rte_rib_node *last, int flag)
+{
+ struct rte_rib_node *tmp, *prev = NULL;
+
+ if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (last == NULL) {
+ tmp = rib->tree;
+ while ((tmp) && (tmp->depth < depth))
+ tmp = get_nxt_node(tmp, ip);
+ } else {
+ tmp = last;
+ while ((tmp->parent != NULL) && (is_right_node(tmp) ||
+ (tmp->parent->right == NULL))) {
+ tmp = tmp->parent;
+ if (is_valid_node(tmp) &&
+ (is_covered(tmp->ip, ip, depth) &&
+ (tmp->depth > depth)))
+ return tmp;
+ }
+ tmp = (tmp->parent) ? tmp->parent->right : NULL;
+ }
+ while (tmp) {
+ if (is_valid_node(tmp) &&
+ (is_covered(tmp->ip, ip, depth) &&
+ (tmp->depth > depth))) {
+ prev = tmp;
+ if (flag == RTE_RIB_GET_NXT_COVER)
+ return prev;
+ }
+ tmp = (tmp->left) ? tmp->left : tmp->right;
+ }
+ return prev;
+}
+
+void
+rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth)
+{
+ struct rte_rib_node *cur, *prev, *child;
+
+ cur = rte_rib_lookup_exact(rib, ip, depth);
+ if (cur == NULL)
+ return;
+
+ --rib->cur_routes;
+ cur->flag &= ~RTE_RIB_VALID_NODE;
+ while (!is_valid_node(cur)) {
+ if ((cur->left != NULL) && (cur->right != NULL))
+ return;
+ child = (cur->left == NULL) ? cur->right : cur->left;
+ if (child != NULL)
+ child->parent = cur->parent;
+ if (cur->parent == NULL) {
+ rib->tree = child;
+ node_free(rib, cur);
+ return;
+ }
+ if (cur->parent->left == cur)
+ cur->parent->left = child;
+ else
+ cur->parent->right = child;
+ prev = cur;
+ cur = cur->parent;
+ node_free(rib, prev);
+ }
+}
+
+struct rte_rib_node *
+rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth)
+{
+ struct rte_rib_node **tmp;
+ struct rte_rib_node *prev = NULL;
+ struct rte_rib_node *new_node = NULL;
+ struct rte_rib_node *common_node = NULL;
+ int d = 0;
+ uint32_t common_prefix;
+ uint8_t common_depth;
+
+ if ((rib == NULL) || (depth > RIB_MAXDEPTH)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ tmp = &rib->tree;
+ ip &= rte_rib_depth_to_mask(depth);
+ new_node = __rib_lookup_exact(rib, ip, depth);
+ if (new_node != NULL) {
+ rte_errno = EEXIST;
+ return NULL;
+ }
+
+ new_node = node_alloc(rib);
+ if (new_node == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ new_node->left = NULL;
+ new_node->right = NULL;
+ new_node->parent = NULL;
+ new_node->ip = ip;
+ new_node->depth = depth;
+ new_node->flag = RTE_RIB_VALID_NODE;
+
+ /* traverse down the tree to find matching node or closest matching */
+ while (1) {
+ /* insert as the last node in the branch */
+ if (*tmp == NULL) {
+ *tmp = new_node;
+ new_node->parent = prev;
+ ++rib->cur_routes;
+ return *tmp;
+ }
+ /*
+ * Intermediate node found.
+ * Previous rte_rib_lookup_exact() returned NULL
+ * but node with proper search criteria is found.
+ * Validate intermediate node and return.
+ */
+ if ((ip == (*tmp)->ip) && (depth == (*tmp)->depth)) {
+ node_free(rib, new_node);
+ (*tmp)->flag |= RTE_RIB_VALID_NODE;
+ ++rib->cur_routes;
+ return *tmp;
+ }
+ d = (*tmp)->depth;
+ if ((d >= depth) || !is_covered(ip, (*tmp)->ip, d))
+ break;
+ prev = *tmp;
+ tmp = (ip & (1 << (31 - d))) ? &(*tmp)->right : &(*tmp)->left;
+ }
+ /* closest node found, new_node should be inserted in the middle */
+ common_depth = RTE_MIN(depth, (*tmp)->depth);
+ common_prefix = ip ^ (*tmp)->ip;
+ d = __builtin_clz(common_prefix);
+
+ common_depth = RTE_MIN(d, common_depth);
+ common_prefix = ip & rte_rib_depth_to_mask(common_depth);
+ if ((common_prefix == ip) && (common_depth == depth)) {
+ /* insert as a parent */
+ if ((*tmp)->ip & (1 << (31 - depth)))
+ new_node->right = *tmp;
+ else
+ new_node->left = *tmp;
+ new_node->parent = (*tmp)->parent;
+ (*tmp)->parent = new_node;
+ *tmp = new_node;
+ } else {
+ /* create intermediate node */
+ common_node = node_alloc(rib);
+ if (common_node == NULL) {
+ node_free(rib, new_node);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ common_node->ip = common_prefix;
+ common_node->depth = common_depth;
+ common_node->flag = 0;
+ common_node->parent = (*tmp)->parent;
+ new_node->parent = common_node;
+ (*tmp)->parent = common_node;
+ if ((new_node->ip & (1 << (31 - common_depth))) == 0) {
+ common_node->left = new_node;
+ common_node->right = *tmp;
+ } else {
+ common_node->left = *tmp;
+ common_node->right = new_node;
+ }
+ *tmp = common_node;
+ }
+ ++rib->cur_routes;
+ return new_node;
+}
+
+int
+rte_rib_get_ip(struct rte_rib_node *node, uint32_t *ip)
+{
+ if ((node == NULL) || (ip == NULL)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ *ip = node->ip;
+ return 0;
+}
+
+int
+rte_rib_get_depth(struct rte_rib_node *node, uint8_t *depth)
+{
+ if ((node == NULL) || (depth == NULL)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ *depth = node->depth;
+ return 0;
+}
+
+void *
+rte_rib_get_ext(struct rte_rib_node *node)
+{
+ return (node == NULL) ? NULL : &node->ext[0];
+}
+
+int
+rte_rib_get_nh(struct rte_rib_node *node, uint64_t *nh)
+{
+ if ((node == NULL) || (nh == NULL)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ *nh = node->nh;
+ return 0;
+}
+
+int
+rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh)
+{
+ if (node == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ node->nh = nh;
+ return 0;
+}
+
+struct rte_rib *
+rte_rib_create(const char *name, int socket_id, struct rte_rib_conf *conf)
+{
+ char mem_name[RTE_RIB_NAMESIZE];
+ struct rte_rib *rib = NULL;
+ struct rte_tailq_entry *te;
+ struct rte_rib_list *rib_list;
+ struct rte_mempool *node_pool;
+
+ /* Check user arguments. */
+ if ((name == NULL) || (conf == NULL) ||
+ (conf->max_nodes == 0)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
+ node_pool = rte_mempool_create(mem_name, conf->max_nodes,
+ sizeof(struct rte_rib_node) + conf->ext_sz, 0, 0,
+ NULL, NULL, NULL, NULL, socket_id, 0);
+
+ if (node_pool == NULL) {
+ RTE_LOG(ERR, LPM,
+ "Can not allocate mempool for RIB %s\n", name);
+ return NULL;
+ }
+
+ snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
+ rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
+
+ rte_mcfg_tailq_write_lock();
+
+ /* guarantee there's no existing */
+ TAILQ_FOREACH(te, rib_list, next) {
+ rib = (struct rte_rib *)te->data;
+ if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
+ break;
+ }
+ rib = NULL;
+ if (te != NULL) {
+ rte_errno = EEXIST;
+ goto exit;
+ }
+
+ /* allocate tailq entry */
+ te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
+ if (te == NULL) {
+ RTE_LOG(ERR, LPM,
+ "Can not allocate tailq entry for RIB %s\n", name);
+ rte_errno = ENOMEM;
+ goto exit;
+ }
+
+ /* Allocate memory to store the RIB data structures. */
+ rib = rte_zmalloc_socket(mem_name,
+ sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id);
+ if (rib == NULL) {
+ RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name);
+ rte_errno = ENOMEM;
+ goto free_te;
+ }
+
+ rte_strlcpy(rib->name, name, sizeof(rib->name));
+ rib->tree = NULL;
+ rib->max_nodes = conf->max_nodes;
+ rib->node_pool = node_pool;
+ te->data = (void *)rib;
+ TAILQ_INSERT_TAIL(rib_list, te, next);
+
+ rte_mcfg_tailq_write_unlock();
+
+ return rib;
+
+free_te:
+ rte_free(te);
+exit:
+ rte_mcfg_tailq_write_unlock();
+ rte_mempool_free(node_pool);
+
+ return NULL;
+}
+
+struct rte_rib *
+rte_rib_find_existing(const char *name)
+{
+ struct rte_rib *rib = NULL;
+ struct rte_tailq_entry *te;
+ struct rte_rib_list *rib_list;
+
+ rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
+
+ rte_mcfg_tailq_read_lock();
+ TAILQ_FOREACH(te, rib_list, next) {
+ rib = (struct rte_rib *) te->data;
+ if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
+ break;
+ }
+ rte_mcfg_tailq_read_unlock();
+
+ if (te == NULL) {
+ rte_errno = ENOENT;
+ return NULL;
+ }
+
+ return rib;
+}
+
+void
+rte_rib_free(struct rte_rib *rib)
+{
+ struct rte_tailq_entry *te;
+ struct rte_rib_list *rib_list;
+ struct rte_rib_node *tmp = NULL;
+
+ if (rib == NULL)
+ return;
+
+ rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
+
+ rte_mcfg_tailq_write_lock();
+
+ /* find our tailq entry */
+ TAILQ_FOREACH(te, rib_list, next) {
+ if (te->data == (void *)rib)
+ break;
+ }
+ if (te != NULL)
+ TAILQ_REMOVE(rib_list, te, next);
+
+ rte_mcfg_tailq_write_unlock();
+
+ while ((tmp = rte_rib_get_nxt(rib, 0, 0, tmp,
+ RTE_RIB_GET_NXT_ALL)) != NULL)
+ rte_rib_remove(rib, tmp->ip, tmp->depth);
+
+ rte_mempool_free(rib->node_pool);
+ rte_free(rib);
+ rte_free(te);
+}
diff --git a/lib/librte_rib/rte_rib.h b/lib/librte_rib/rte_rib.h
new file mode 100644
index 0000000..6b70de9
--- /dev/null
+++ b/lib/librte_rib/rte_rib.h
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _RTE_RIB_H_
+#define _RTE_RIB_H_
+
+/**
+ * @file
+ * Level compressed tree implementation for IPv4 Longest Prefix Match
+ */
+
+#include <rte_compat.h>
+
+/**
+ * rte_rib_get_nxt() flags
+ */
+enum {
+ /** flag to get all subroutes in a RIB tree */
+ RTE_RIB_GET_NXT_ALL,
+ /** flag to get first matched subroutes in a RIB tree */
+ RTE_RIB_GET_NXT_COVER
+};
+
+struct rte_rib;
+struct rte_rib_node;
+
+/** RIB configuration structure */
+struct rte_rib_conf {
+ /**
+ * Size of extension block inside rte_rib_node.
+ * This space could be used to store additional user
+ * defined data.
+ */
+ size_t ext_sz;
+ /* size of rte_rib_node's pool */
+ int max_nodes;
+};
+
+/**
+ * Get an IPv4 mask from prefix length
+ * It is caller responsibility to make sure depth is not bigger than 32
+ *
+ * @param depth
+ * prefix length
+ * @return
+ * IPv4 mask
+ */
+static inline uint32_t
+rte_rib_depth_to_mask(uint8_t depth)
+{
+ return (uint32_t)(UINT64_MAX << (32 - depth));
+}
+
+/**
+ * Lookup an IP into the RIB structure
+ *
+ * @param rib
+ * RIB object handle
+ * @param ip
+ * IP to be looked up in the RIB
+ * @return
+ * pointer to struct rte_rib_node on success
+ * NULL otherwise
+ */
+__rte_experimental
+struct rte_rib_node *
+rte_rib_lookup(struct rte_rib *rib, uint32_t ip);
+
+/**
+ * Lookup less specific route into the RIB structure
+ *
+ * @param ent
+ * Pointer to struct rte_rib_node that represents target route
+ * @return
+ * pointer to struct rte_rib_node that represents
+ * less specific route on success
+ * NULL otherwise
+ */
+__rte_experimental
+struct rte_rib_node *
+rte_rib_lookup_parent(struct rte_rib_node *ent);
+
+/**
+ * Lookup prefix into the RIB structure
+ *
+ * @param rib
+ * RIB object handle
+ * @param ip
+ * net to be looked up in the RIB
+ * @param depth
+ * prefix length
+ * @return
+ * pointer to struct rte_rib_node on success
+ * NULL otherwise
+ */
+__rte_experimental
+struct rte_rib_node *
+rte_rib_lookup_exact(struct rte_rib *rib, uint32_t ip, uint8_t depth);
+
+/**
+ * Retrieve next more specific prefix from the RIB
+ * that is covered by ip/depth supernet in an ascending order
+ *
+ * @param rib
+ * RIB object handle
+ * @param ip
+ * net address of supernet prefix that covers returned more specific prefixes
+ * @param depth
+ * supernet prefix length
+ * @param last
+ * pointer to the last returned prefix to get next prefix
+ * or
+ * NULL to get first more specific prefix
+ * @param flag
+ * -RTE_RIB_GET_NXT_ALL
+ * get all prefixes from subtrie
+ * -RTE_RIB_GET_NXT_COVER
+ * get only first more specific prefix even if it have more specifics
+ * @return
+ * pointer to the next more specific prefix
+ * NULL if there is no prefixes left
+ */
+__rte_experimental
+struct rte_rib_node *
+rte_rib_get_nxt(struct rte_rib *rib, uint32_t ip, uint8_t depth,
+ struct rte_rib_node *last, int flag);
+
+/**
+ * Remove prefix from the RIB
+ *
+ * @param rib
+ * RIB object handle
+ * @param ip
+ * net to be removed from the RIB
+ * @param depth
+ * prefix length
+ */
+__rte_experimental
+void
+rte_rib_remove(struct rte_rib *rib, uint32_t ip, uint8_t depth);
+
+/**
+ * Insert prefix into the RIB
+ *
+ * @param rib
+ * RIB object handle
+ * @param ip
+ * net to be inserted to the RIB
+ * @param depth
+ * prefix length
+ * @return
+ * pointer to new rte_rib_node on success
+ * NULL otherwise
+ */
+__rte_experimental
+struct rte_rib_node *
+rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth);
+
+/**
+ * Get an ip from rte_rib_node
+ *
+ * @param node
+ * pointer to the rib node
+ * @param ip
+ * pointer to the ip to save
+ * @return
+ * 0 on success.
+ * -1 on failure with rte_errno indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_rib_get_ip(struct rte_rib_node *node, uint32_t *ip);
+
+/**
+ * Get a depth from rte_rib_node
+ *
+ * @param node
+ * pointer to the rib node
+ * @param depth
+ * pointer to the depth to save
+ * @return
+ * 0 on success.
+ * -1 on failure with rte_errno indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_rib_get_depth(struct rte_rib_node *node, uint8_t *depth);
+
+/**
+ * Get ext field from the rib node
+ * It is caller responsibility to make sure there are necessary space
+ * for the ext field inside rib node.
+ *
+ * @param node
+ * pointer to the rib node
+ * @return
+ * pointer to the ext
+ */
+__rte_experimental
+void *
+rte_rib_get_ext(struct rte_rib_node *node);
+
+/**
+ * Get nexthop from the rib node
+ *
+ * @param node
+ * pointer to the rib node
+ * @param nh
+ * pointer to the nexthop to save
+ * @return
+ * 0 on success.
+ * -1 on failure with rte_errno indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_rib_get_nh(struct rte_rib_node *node, uint64_t *nh);
+
+/**
+ * Set nexthop into the rib node
+ *
+ * @param node
+ * pointer to the rib node
+ * @param nh
+ * nexthop value to set to the rib node
+ * @return
+ * 0 on success.
+ * -1 on failure with rte_errno indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_rib_set_nh(struct rte_rib_node *node, uint64_t nh);
+
+/**
+ * Create RIB
+ *
+ * @param name
+ * RIB name
+ * @param socket_id
+ * NUMA socket ID for RIB table memory allocation
+ * @param conf
+ * Structure containing the configuration
+ * @return
+ * Handle to RIB object on success
+ * NULL otherwise with rte_errno indicating reason for failure.
+ */
+__rte_experimental
+struct rte_rib *
+rte_rib_create(const char *name, int socket_id, struct rte_rib_conf *conf);
+
+/**
+ * Find an existing RIB object and return a pointer to it.
+ *
+ * @param name
+ * Name of the rib object as passed to rte_rib_create()
+ * @return
+ * Pointer to RIB object on success
+ * NULL otherwise with rte_errno indicating reason for failure.
+ */
+__rte_experimental
+struct rte_rib *
+rte_rib_find_existing(const char *name);
+
+/**
+ * Free an RIB object.
+ *
+ * @param rib
+ * RIB object handle
+ * @return
+ * None
+ */
+__rte_experimental
+void
+rte_rib_free(struct rte_rib *rib);
+
+#endif /* _RTE_RIB_H_ */
diff --git a/lib/librte_rib/rte_rib_version.map b/lib/librte_rib/rte_rib_version.map
new file mode 100644
index 0000000..1432a22
--- /dev/null
+++ b/lib/librte_rib/rte_rib_version.map
@@ -0,0 +1,20 @@
+EXPERIMENTAL {
+ global:
+
+ rte_rib_create;
+ rte_rib_find_existing;
+ rte_rib_free;
+ rte_rib_get_depth;
+ rte_rib_get_ext;
+ rte_rib_get_ip;
+ rte_rib_get_nh;
+ rte_rib_get_nxt;
+ rte_rib_insert;
+ rte_rib_lookup;
+ rte_rib_lookup_parent;
+ rte_rib_lookup_exact;
+ rte_rib_set_nh;
+ rte_rib_remove;
+
+ local: *;
+};
diff --git a/lib/meson.build b/lib/meson.build
index f29eb41..a7eded6 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -22,7 +22,7 @@ libraries = [
'gro', 'gso', 'ip_frag', 'jobstats',
'kni', 'latencystats', 'lpm', 'member',
'power', 'pdump', 'rawdev',
- 'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',
+ 'rcu', 'rib', 'reorder', 'sched', 'security', 'stack', 'vhost',
# ipsec lib depends on net, crypto and security
'ipsec',
# add pkt framework libs which use other libs from above
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 1f5c748..29d901e 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -45,6 +45,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PDUMP) += -lrte_pdump
_LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor
_LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag
_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter
+_LDLIBS-$(CONFIG_RTE_LIBRTE_RIB) += -lrte_rib
_LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm
_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl
_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-as-needed