summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIntel <intel.com>2013-06-03 00:00:00 +0000
committerThomas Monjalon <thomas.monjalon@6wind.com>2013-09-17 14:09:21 +0200
commitde3cfa2c9823a7e0391d4f4a355e2d78b83eec62 (patch)
tree0af4380bb4b07f872b3ea408ebeb80fc6a3f8a87
parente6541fdec8b2407e46569147be6887ca3ad4a704 (diff)
downloaddpdk-de3cfa2c9823.zip
dpdk-de3cfa2c9823.tar.gz
dpdk-de3cfa2c9823.tar.xz
sched: initial import
Signed-off-by: Intel
-rwxr-xr-xapp/test/Makefile2
-rwxr-xr-xapp/test/autotest_data.py16
-rwxr-xr-xapp/test/commands.c6
-rwxr-xr-xapp/test/test.h2
-rw-r--r--app/test/test_red.c1890
-rwxr-xr-xapp/test/test_sched.c244
-rw-r--r--config/defconfig_i686-default-linuxapp-gcc10
-rw-r--r--config/defconfig_i686-default-linuxapp-icc10
-rw-r--r--config/defconfig_x86_64-default-linuxapp-gcc11
-rw-r--r--config/defconfig_x86_64-default-linuxapp-icc10
-rwxr-xr-xexamples/qos_sched/Makefile58
-rwxr-xr-xexamples/qos_sched/app_thread.c302
-rwxr-xr-xexamples/qos_sched/args.c467
-rwxr-xr-xexamples/qos_sched/cfg_file.c631
-rwxr-xr-xexamples/qos_sched/cfg_file.h103
-rwxr-xr-xexamples/qos_sched/init.c385
-rwxr-xr-xexamples/qos_sched/main.c246
-rwxr-xr-xexamples/qos_sched/main.h186
-rw-r--r--examples/qos_sched/profile.cfg109
-rw-r--r--lib/Makefile1
-rw-r--r--lib/librte_eal/common/include/rte_log.h1
-rw-r--r--lib/librte_mbuf/rte_mbuf.h1
-rw-r--r--lib/librte_sched/Makefile56
-rw-r--r--lib/librte_sched/rte_approx.c197
-rw-r--r--lib/librte_sched/rte_approx.h76
-rw-r--r--lib/librte_sched/rte_bitmap.h505
-rw-r--r--lib/librte_sched/rte_red.c160
-rw-r--r--lib/librte_sched/rte_red.h454
-rw-r--r--lib/librte_sched/rte_sched.c2129
-rw-r--r--lib/librte_sched/rte_sched.h446
-rw-r--r--lib/librte_sched/rte_sched_common.h130
-rw-r--r--mk/rte.app.mk6
32 files changed, 8847 insertions, 3 deletions
diff --git a/app/test/Makefile b/app/test/Makefile
index 6ba18e4..4fecdb6 100755
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -85,6 +85,8 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_ipaddr.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_cirbuf.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_string.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_lib.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_red.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_sched.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_meter.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_pm.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_acl.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 9bd436b..f2f9965 100755
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -271,7 +271,7 @@ parallel_test_group_list = [
},
{
"Prefix": "group_6",
- "Memory" : all_sockets(588),
+ "Memory" : all_sockets(600),
"Tests" :
[
{
@@ -297,7 +297,13 @@ parallel_test_group_list = [
"Command" : "prefetch_autotest",
"Func" : default_autotest,
"Report" : None,
- },
+ },
+ {
+ "Name" :"Red autotest",
+ "Command" : "red_autotest",
+ "Func" :default_autotest,
+ "Report" :None,
+ },
]
},
{
@@ -317,6 +323,12 @@ parallel_test_group_list = [
"Func" : default_autotest,
"Report" : None,
},
+ {
+ "Name" :"Sched autotest",
+ "Command" : "sched_autotest",
+ "Func" :default_autotest,
+ "Report" :None,
+ },
]
},
]
diff --git a/app/test/commands.c b/app/test/commands.c
index 2438433..c7ac1e4 100755
--- a/app/test/commands.c
+++ b/app/test/commands.c
@@ -167,6 +167,10 @@ static void cmd_autotest_parsed(void *parsed_result,
ret |= test_memcpy_perf();
if (all || !strcmp(res->autotest, "func_reentrancy_autotest"))
ret |= test_func_reentrancy();
+ if (all || !strcmp(res->autotest, "red_autotest"))
+ ret |= test_red();
+ if (all || !strcmp(res->autotest, "sched_autotest"))
+ ret |= test_sched();
if (all || !strcmp(res->autotest, "meter_autotest"))
ret |= test_meter();
if (all || !strcmp(res->autotest, "pm_autotest"))
@@ -203,7 +207,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest =
"version_autotest#eal_fs_autotest#"
"cmdline_autotest#func_reentrancy_autotest#"
"mempool_perf_autotest#hash_perf_autotest#"
- "meter_autotest#"
+ "red_autotest#meter_autotest#sched_autotest#"
"memcpy_perf_autotest#pm_autotest#"
"acl_autotest#power_autotest#"
"all_autotests");
diff --git a/app/test/test.h b/app/test/test.h
index 75df8d0..6bac209 100755
--- a/app/test/test.h
+++ b/app/test/test.h
@@ -84,6 +84,8 @@ int test_version(void);
int test_eal_fs(void);
int test_cmdline(void);
int test_func_reentrancy(void);
+int test_red(void);
+int test_sched(void);
int test_meter(void);
int test_pmac_pm(void);
int test_pmac_acl(void);
diff --git a/app/test/test_red.c b/app/test/test_red.c
new file mode 100644
index 0000000..f083349
--- /dev/null
+++ b/app/test/test_red.c
@@ -0,0 +1,1890 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#ifdef RTE_LIBRTE_SCHED
+
+#include <rte_red.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#pragma warning(disable:181) /* Arg incompatible with format string */
+#endif
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+#define TEST_HZ_PER_KHZ 1000
+#define TEST_NSEC_MARGIN 500 /**< nanosecond margin when calculating clk freq */
+
+#define MAX_QEMPTY_TIME_MSEC 50000
+#define MSEC_PER_SEC 1000 /**< Milli-seconds per second */
+#define USEC_PER_MSEC 1000 /**< Micro-seconds per milli-second */
+#define USEC_PER_SEC 1000000 /**< Micro-seconds per second */
+
+/**< structures for testing rte_red performance and function */
+struct test_rte_red_config { /**< Test structure for RTE_RED config */
+ struct rte_red_config *rconfig; /**< RTE_RED configuration parameters */
+ uint8_t num_cfg; /**< Number of RTE_RED configs to test */
+ uint8_t *wq_log2; /**< Test wq_log2 value to use */
+ uint32_t min_th; /**< Queue minimum threshold */
+ uint32_t max_th; /**< Queue maximum threshold */
+ uint8_t *maxp_inv; /**< Inverse mark probability */
+};
+
+struct test_queue { /**< Test structure for RTE_RED Queues */
+ struct rte_red *rdata; /**< RTE_RED runtime data */
+ uint32_t num_queues; /**< Number of RTE_RED queues to test */
+ uint32_t *qconfig; /**< Configuration of RTE_RED queues for test */
+ uint32_t *q; /**< Queue size */
+ uint32_t q_ramp_up; /**< Num of enqueues to ramp up the queue */
+ uint32_t avg_ramp_up; /**< Average num of enqueues to ramp up the queue */
+ uint32_t avg_tolerance; /**< Tolerance in queue average */
+ double drop_tolerance; /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var { /**< Test variables used for testing RTE_RED */
+ uint32_t wait_usec; /**< Micro second wait interval */
+ uint32_t num_iterations; /**< Number of test iterations */
+ uint32_t num_ops; /**< Number of test operations */
+ uint64_t clk_freq; /**< CPU clock frequency */
+ uint32_t sleep_sec; /**< Seconds to sleep */
+ uint32_t *dropped; /**< Test operations dropped */
+ uint32_t *enqueued; /**< Test operations enqueued */
+};
+
+struct test_config { /**< Master test structure for RTE_RED */
+ const char *ifname; /**< Interface name */
+ const char *msg; /**< Test message for display */
+ const char *htxt; /**< Header txt display for result output */
+ struct test_rte_red_config *tconfig; /**< Test structure for RTE_RED config */
+ struct test_queue *tqueue; /**< Test structure for RTE_RED Queues */
+ struct test_var *tvar; /**< Test variables used for testing RTE_RED */
+ uint32_t *tlevel; /**< Queue levels */
+};
+
+enum test_result {
+ FAIL = 0,
+ PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+ struct test_config *testcfg;
+ enum test_result (*testfn)(struct test_config *);
+};
+
+struct rdtsc_prof {
+ uint64_t clk_start;
+ uint64_t clk_min; /**< min clocks */
+ uint64_t clk_max; /**< max clocks */
+ uint64_t clk_avgc; /**< count to calc average */
+ double clk_avg; /**< cumulative sum to calc average */
+ const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte = 0;
+static double pkt_time_usec = 0;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+ double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+ inv_cycles_per_byte = 1.0 / cycles_per_byte;
+ pkt_time_usec = 1000000.0 / ((double)port_speed_bytes / (double)RTE_RED_S);
+}
+
+static uint64_t get_port_ts(void)
+{
+ return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+ p->clk_min = (uint64_t)(-1LL);
+ p->clk_max = 0;
+ p->clk_avg = 0;
+ p->clk_avgc = 0;
+ p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+ asm( "cpuid" : : : "%eax", "%ebx", "%ecx", "%edx" );
+ p->clk_start = rte_rdtsc();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+ uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+ p->clk_avgc++;
+ p->clk_avg += (double) clk_start;
+
+ if (clk_start > p->clk_max)
+ p->clk_max = clk_start;
+ if (clk_start < p->clk_min)
+ p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+ if (p->clk_avgc>0) {
+ printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64 ", max=%" PRIu64 ", avg=%.1f\n",
+ p->name,
+ p->clk_avgc,
+ p->clk_min,
+ p->clk_max,
+ (p->clk_avg / ((double) p->clk_avgc)));
+ }
+}
+
+static uint32_t rte_red_get_avg_int(const struct rte_red_config *red_cfg,
+ struct rte_red *red)
+{
+ /**
+ * scale by 1/n and convert from fixed-point to integer
+ */
+ return red->avg >> (RTE_RED_SCALING + red_cfg->wq_log2);
+}
+
+static double rte_red_get_avg_float(const struct rte_red_config *red_cfg,
+ struct rte_red *red)
+{
+ /**
+ * scale by 1/n and convert from fixed-point to floating-point
+ */
+ return ldexp((double)red->avg, -(RTE_RED_SCALING + red_cfg->wq_log2));
+}
+
+static void rte_red_set_avg_int(const struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ uint32_t avg)
+{
+ /**
+ * scale by n and convert from integer to fixed-point
+ */
+ red->avg = avg << (RTE_RED_SCALING + red_cfg->wq_log2);
+}
+
+static double calc_exp_avg_on_empty(double avg, uint32_t n, uint32_t time_diff)
+{
+ return avg * pow((1.0 - 1.0 / (double)n), (double)time_diff / pkt_time_usec);
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+ return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ * calculate the drop probability
+ */
+static double calc_drop_prob(uint32_t min_th, uint32_t max_th,
+ uint32_t maxp_inv, uint32_t avg)
+{
+ double drop_prob = 0.0;
+
+ if (avg < min_th) {
+ drop_prob = 0.0;
+ } else if (avg < max_th) {
+ drop_prob = (1.0 / (double)maxp_inv)
+ * ((double)(avg - min_th)
+ / (double)(max_th - min_th));
+ } else {
+ drop_prob = 1.0;
+ }
+ return (drop_prob);
+}
+
+/**
+ * check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob, double tolerance)
+{
+ double abs_diff = 0.0;
+ int ret = 1;
+
+ abs_diff = fabs(drop_rate - drop_prob);
+ if ((int)abs_diff == 0) {
+ *diff = 0.0;
+ } else {
+ *diff = (abs_diff / drop_prob) * 100.0;
+ if (*diff > tolerance) {
+ ret = 0;
+ }
+ }
+ return (ret);
+}
+
+/**
+ * check if average queue size is within tolerance
+ */
+static int check_avg(double *diff, double avg, double exp_avg, double tolerance)
+{
+ double abs_diff = 0.0;
+ int ret = 1;
+
+ abs_diff = fabs(avg - exp_avg);
+ if ((int)abs_diff == 0) {
+ *diff = 0.0;
+ } else {
+ *diff = (abs_diff / exp_avg) * 100.0;
+ if (*diff > tolerance) {
+ ret = 0;
+ }
+ }
+ return (ret);
+}
+
+/**
+ * get the clk frequency in Hz
+ */
+static uint64_t get_machclk_freq(void)
+{
+ uint64_t start = 0;
+ uint64_t end = 0;
+ uint64_t diff = 0;
+ uint64_t clk_freq_hz = 0;
+ struct timespec tv_start = {0, 0}, tv_end = {0, 0};
+ struct timespec req = {0, 0};
+
+ req.tv_sec = 1;
+ req.tv_nsec = 0;
+
+ clock_gettime(CLOCK_REALTIME, &tv_start);
+ start = rte_rdtsc();
+
+ if (nanosleep(&req, NULL) != 0) {
+ perror("get_machclk_freq()");
+ exit(EXIT_FAILURE);
+ }
+
+ clock_gettime(CLOCK_REALTIME, &tv_end);
+ end = rte_rdtsc();
+
+ diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * USEC_PER_SEC
+ + ((tv_end.tv_nsec - tv_start.tv_nsec + TEST_NSEC_MARGIN) /
+ USEC_PER_MSEC); /**< diff is in micro secs */
+
+ if (diff == 0)
+ return(0);
+
+ clk_freq_hz = ((end - start) * USEC_PER_SEC / diff);
+ return (clk_freq_hz);
+}
+
+/**
+ * initialize the test rte_red config
+ */
+static enum test_result
+test_rte_red_init(struct test_config *tcfg)
+{
+ unsigned i = 0;
+
+ tcfg->tvar->clk_freq = get_machclk_freq();
+ init_port_ts( tcfg->tvar->clk_freq );
+
+ for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+ if (rte_red_config_init(&tcfg->tconfig->rconfig[i],
+ (uint16_t)tcfg->tconfig->wq_log2[i],
+ (uint16_t)tcfg->tconfig->min_th,
+ (uint16_t)tcfg->tconfig->max_th,
+ (uint16_t)tcfg->tconfig->maxp_inv[i]) != 0) {
+ return(FAIL);
+ }
+ }
+
+ *tcfg->tqueue->q = 0;
+ *tcfg->tvar->dropped = 0;
+ *tcfg->tvar->enqueued = 0;
+ return(PASS);
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_actual_qsize(struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ uint32_t *q,
+ uint32_t level,
+ uint32_t attempts)
+{
+ uint32_t i = 0;
+
+ for (i = 0; i < attempts; i++) {
+ int ret = 0;
+
+ /**
+ * enqueue
+ */
+ ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts() );
+ if (ret == 0) {
+ if (++(*q) >= level)
+ break;
+ }
+ }
+ /**
+ * check if target actual queue size has been reached
+ */
+ if (*q != level)
+ return (-1);
+ /**
+ * success
+ */
+ return (0);
+}
+
+/**
+ * enqueue until average queue size reaches target level
+ */
+static int
+increase_average_qsize(struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ uint32_t *q,
+ uint32_t level,
+ uint32_t num_ops)
+{
+ uint32_t avg = 0;
+ uint32_t i = 0;
+
+ for (i = 0; i < num_ops; i++) {
+ /**
+ * enqueue
+ */
+ rte_red_enqueue(red_cfg, red, *q, get_port_ts());
+ }
+ /**
+ * check if target average queue size has been reached
+ */
+ avg = rte_red_get_avg_int(red_cfg, red);
+ if (avg != level)
+ return (-1);
+ /**
+ * success
+ */
+ return (0);
+}
+
+/**
+ * setup default values for the functional test structures
+ */
+static struct rte_red_config ft_wrconfig[1];
+static struct rte_red ft_rtdata[1];
+static uint8_t ft_wq_log2[] = {9};
+static uint8_t ft_maxp_inv[] = {10};
+static uint32_t ft_qconfig[] = {0, 0, 1, 1};
+static uint32_t ft_q[] ={0};
+static uint32_t ft_dropped[] ={0};
+static uint32_t ft_enqueued[] ={0};
+
+static struct test_rte_red_config ft_tconfig = {
+ .rconfig = ft_wrconfig,
+ .num_cfg = DIM(ft_wrconfig),
+ .wq_log2 = ft_wq_log2,
+ .min_th = 32,
+ .max_th = 128,
+ .maxp_inv = ft_maxp_inv,
+};
+
+static struct test_queue ft_tqueue = {
+ .rdata = ft_rtdata,
+ .num_queues = DIM(ft_rtdata),
+ .qconfig = ft_qconfig,
+ .q = ft_q,
+ .q_ramp_up = 1000000,
+ .avg_ramp_up = 1000000,
+ .avg_tolerance = 5, /* 5 percent */
+ .drop_tolerance = 50, /* 50 percent */
+};
+
+static struct test_var ft_tvar = {
+ .wait_usec = 250000,
+ .num_iterations = 20,
+ .num_ops = 10000,
+ .clk_freq = 0,
+ .dropped = ft_dropped,
+ .enqueued = ft_enqueued,
+ .sleep_sec = (MAX_QEMPTY_TIME_MSEC / MSEC_PER_SEC) + 2,
+};
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void enqueue_dequeue_func(struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ uint32_t *q,
+ uint32_t num_ops,
+ uint32_t *enqueued,
+ uint32_t *dropped)
+{
+ uint32_t i = 0;
+
+ for (i = 0; i < num_ops; i++) {
+ int ret = 0;
+
+ /**
+ * enqueue
+ */
+ ret = rte_red_enqueue(red_cfg, red, *q, get_port_ts());
+ if (ret == 0)
+ (*enqueued)++;
+ else
+ (*dropped)++;
+ }
+}
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft1_tlevels[] = {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test1_config = {
+ .ifname = "functional test 1 interface",
+ .msg = "functional test 1 : use one rte_red configuration,\n"
+ " increase average queue size to various levels,\n"
+ " compare drop rate to drop probability\n\n",
+ .htxt = " "
+ "avg queue size "
+ "enqueued "
+ "dropped "
+ "drop prob % "
+ "drop rate % "
+ "diff % "
+ "tolerance % "
+ "\n",
+ .tconfig = &ft_tconfig,
+ .tqueue = &ft_tqueue,
+ .tvar = &ft_tvar,
+ .tlevel = ft1_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ uint32_t i = 0;
+
+ printf("%s", tcfg->msg);
+
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+
+ printf("%s", tcfg->htxt);
+
+ for (i = 0; i < DIM(ft1_tlevels); i++) {
+ const char *label = NULL;
+ uint32_t avg = 0;
+ double drop_rate = 0.0;
+ double drop_prob = 0.0;
+ double diff = 0.0;
+
+ /**
+ * reset rte_red run-time data
+ */
+ rte_red_rt_data_init(tcfg->tqueue->rdata);
+ *tcfg->tvar->enqueued = 0;
+ *tcfg->tvar->dropped = 0;
+
+ if (increase_actual_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ tcfg->tlevel[i],
+ tcfg->tqueue->q_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ if (increase_average_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ tcfg->tlevel[i],
+ tcfg->tqueue->avg_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ enqueue_dequeue_func(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ tcfg->tvar->num_ops,
+ tcfg->tvar->enqueued,
+ tcfg->tvar->dropped);
+
+ avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+ if (avg != tcfg->tlevel[i]) {
+ fprintf(stderr, "Fail: avg != level\n");
+ result = FAIL;
+ }
+
+ drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+ drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+ *tcfg->tconfig->maxp_inv, tcfg->tlevel[i]);
+ if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+ result = FAIL;
+
+ if (tcfg->tlevel[i] == tcfg->tconfig->min_th)
+ label = "min thresh: ";
+ else if (tcfg->tlevel[i] == tcfg->tconfig->max_th)
+ label = "max thresh: ";
+ else
+ label = " ";
+ printf("%s%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+ label, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+ drop_prob * 100.0, drop_rate * 100.0, diff,
+ (double)tcfg->tqueue->drop_tolerance);
+ }
+out:
+ return (result);
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint8_t ft2_wq_log2[] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9};
+static uint8_t ft2_maxp_inv[] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100};
+static struct rte_red_config ft2_rconfig[10];
+
+static struct test_rte_red_config ft2_tconfig = {
+ .rconfig = ft2_rconfig,
+ .num_cfg = DIM(ft2_rconfig),
+ .wq_log2 = ft2_wq_log2,
+ .min_th = 32,
+ .max_th = 128,
+ .maxp_inv = ft2_maxp_inv,
+};
+
+static struct test_config func_test2_config = {
+ .ifname = "functional test 2 interface",
+ .msg = "functional test 2 : use several RED configurations,\n"
+ " increase average queue size to just below maximum threshold,\n"
+ " compare drop rate to drop probability\n\n",
+ .htxt = "RED config "
+ "avg queue size "
+ "min threshold "
+ "max threshold "
+ "drop prob % "
+ "drop rate % "
+ "diff % "
+ "tolerance % "
+ "\n",
+ .tconfig = &ft2_tconfig,
+ .tqueue = &ft_tqueue,
+ .tvar = &ft_tvar,
+ .tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ double prev_drop_rate = 1.0;
+ uint32_t i = 0;
+
+ printf("%s", tcfg->msg);
+
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+ rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+ if (increase_actual_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ *tcfg->tlevel,
+ tcfg->tqueue->q_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ if (increase_average_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ *tcfg->tlevel,
+ tcfg->tqueue->avg_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+ printf("%s", tcfg->htxt);
+
+ for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+ uint32_t avg = 0;
+ double drop_rate = 0.0;
+ double drop_prob = 0.0;
+ double diff = 0.0;
+
+ *tcfg->tvar->dropped = 0;
+ *tcfg->tvar->enqueued = 0;
+
+ enqueue_dequeue_func(&tcfg->tconfig->rconfig[i],
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ tcfg->tvar->num_ops,
+ tcfg->tvar->enqueued,
+ tcfg->tvar->dropped);
+
+ avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[i], tcfg->tqueue->rdata);
+ if (avg != *tcfg->tlevel)
+ result = FAIL;
+
+ drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+ drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+ tcfg->tconfig->maxp_inv[i], *tcfg->tlevel);
+ if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+ result = FAIL;
+ /**
+ * drop rate should decrease as maxp_inv increases
+ */
+ if (drop_rate > prev_drop_rate)
+ result = FAIL;
+ prev_drop_rate = drop_rate;
+
+ printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+ i, avg, tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+ drop_prob * 100.0, drop_rate * 100.0, diff,
+ (double)tcfg->tqueue->drop_tolerance);
+ }
+out:
+ return (result);
+}
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevel[] = {1022};
+
+static struct test_rte_red_config ft3_tconfig = {
+ .rconfig = ft_wrconfig,
+ .num_cfg = DIM(ft_wrconfig),
+ .wq_log2 = ft_wq_log2,
+ .min_th = 32,
+ .max_th = 1023,
+ .maxp_inv = ft_maxp_inv,
+};
+
+static struct test_config func_test3_config = {
+ .ifname = "functional test 3 interface",
+ .msg = "functional test 3 : use one RED configuration,\n"
+ " increase average queue size to target level,\n"
+ " dequeue all packets until queue is empty,\n"
+ " confirm that average queue size is computed correctly while queue is empty\n\n",
+ .htxt = "q avg before "
+ "q avg after "
+ "expected "
+ "difference % "
+ "tolerance % "
+ "result "
+ "\n",
+ .tconfig = &ft3_tconfig,
+ .tqueue = &ft_tqueue,
+ .tvar = &ft_tvar,
+ .tlevel = ft3_tlevel,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ uint32_t i = 0;
+
+ printf("%s", tcfg->msg);
+
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+
+ rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+ if (increase_actual_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ *tcfg->tlevel,
+ tcfg->tqueue->q_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ if (increase_average_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ *tcfg->tlevel,
+ tcfg->tqueue->avg_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ printf("%s", tcfg->htxt);
+
+ for (i = 0; i < tcfg->tvar->num_iterations; i++) {
+ double avg_before = 0;
+ double avg_after = 0;
+ double exp_avg = 0;
+ double diff = 0.0;
+
+ avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+
+ /**
+ * empty the queue
+ */
+ *tcfg->tqueue->q = 0;
+ rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+ rte_delay_us(tcfg->tvar->wait_usec);
+
+ /**
+ * enqueue one packet to recalculate average queue size
+ */
+ if (rte_red_enqueue(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ *tcfg->tqueue->q,
+ get_port_ts()) == 0) {
+ (*tcfg->tqueue->q)++;
+ } else {
+ printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__);
+ result = FAIL;
+ }
+
+ exp_avg = calc_exp_avg_on_empty(avg_before,
+ (1 << *tcfg->tconfig->wq_log2),
+ tcfg->tvar->wait_usec);
+ avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata);
+ if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+ result = FAIL;
+
+ printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+ avg_before, avg_after, exp_avg, diff,
+ (double)tcfg->tqueue->avg_tolerance,
+ diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+ }
+out:
+ return (result);
+}
+
+/**
+ * Test F4: functional test 4
+ */
+static uint32_t ft4_tlevel[] = {1022};
+static uint8_t ft4_wq_log2[] = {11};
+
+static struct test_rte_red_config ft4_tconfig = {
+ .rconfig = ft_wrconfig,
+ .num_cfg = DIM(ft_wrconfig),
+ .min_th = 32,
+ .max_th = 1023,
+ .wq_log2 = ft4_wq_log2,
+ .maxp_inv = ft_maxp_inv,
+};
+
+static struct test_queue ft4_tqueue = {
+ .rdata = ft_rtdata,
+ .num_queues = DIM(ft_rtdata),
+ .qconfig = ft_qconfig,
+ .q = ft_q,
+ .q_ramp_up = 1000000,
+ .avg_ramp_up = 1000000,
+ .avg_tolerance = 0, /* 0 percent */
+ .drop_tolerance = 50, /* 50 percent */
+};
+
+static struct test_config func_test4_config = {
+ .ifname = "functional test 4 interface",
+ .msg = "functional test 4 : use one RED configuration,\n"
+ " increase average queue size to target level,\n"
+ " dequeue all packets until queue is empty,\n"
+ " confirm that average queue size is computed correctly while\n"
+ " queue is empty for more than 50 sec,\n"
+ " (this test takes 52 sec to run)\n\n",
+ .htxt = "q avg before "
+ "q avg after "
+ "expected "
+ "difference % "
+ "tolerance % "
+ "result "
+ "\n",
+ .tconfig = &ft4_tconfig,
+ .tqueue = &ft4_tqueue,
+ .tvar = &ft_tvar,
+ .tlevel = ft4_tlevel,
+};
+
+static enum test_result func_test4(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ uint64_t time_diff = 0;
+ uint64_t start = 0;
+ double avg_before = 0.0;
+ double avg_after = 0.0;
+ double exp_avg = 0.0;
+ double diff = 0.0;
+
+ printf("%s", tcfg->msg);
+
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+
+ rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+ if (increase_actual_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ *tcfg->tlevel,
+ tcfg->tqueue->q_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ if (increase_average_qsize(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ *tcfg->tlevel,
+ tcfg->tqueue->avg_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ printf("%s", tcfg->htxt);
+
+ avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+
+ /**
+ * empty the queue
+ */
+ *tcfg->tqueue->q = 0;
+ rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+ /**
+ * record empty time locally
+ */
+ start = rte_rdtsc();
+
+ sleep(tcfg->tvar->sleep_sec);
+
+ /**
+ * enqueue one packet to recalculate average queue size
+ */
+ if (rte_red_enqueue(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ *tcfg->tqueue->q,
+ get_port_ts()) != 0) {
+ result = FAIL;
+ goto out;
+ }
+ (*tcfg->tqueue->q)++;
+
+ /**
+ * calculate how long queue has been empty
+ */
+ time_diff = ((rte_rdtsc() - start) / tcfg->tvar->clk_freq)
+ * MSEC_PER_SEC;
+ if (time_diff < MAX_QEMPTY_TIME_MSEC) {
+ /**
+ * this could happen if sleep was interrupted for some reason
+ */
+ result = FAIL;
+ goto out;
+ }
+
+ /**
+ * confirm that average queue size is now at expected level
+ */
+ exp_avg = 0.0;
+ avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+ if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+ result = FAIL;
+
+ printf("%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+ avg_before, avg_after, exp_avg,
+ diff, (double)tcfg->tqueue->avg_tolerance,
+ diff <= (double)tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+out:
+ return (result);
+}
+
+/**
+ * Test F5: functional test 5
+ */
+static uint32_t ft5_tlevel[] = {127};
+static uint8_t ft5_wq_log2[] = {9, 8};
+static uint8_t ft5_maxp_inv[] = {10, 20};
+static struct rte_red_config ft5_config[2];
+static struct rte_red ft5_data[4];
+static uint32_t ft5_q[4];
+static uint32_t ft5_dropped[] = {0, 0, 0, 0};
+static uint32_t ft5_enqueued[] = {0, 0, 0, 0};
+
+static struct test_rte_red_config ft5_tconfig = {
+ .rconfig = ft5_config,
+ .num_cfg = DIM(ft5_config),
+ .min_th = 32,
+ .max_th = 128,
+ .wq_log2 = ft5_wq_log2,
+ .maxp_inv = ft5_maxp_inv,
+};
+
+static struct test_queue ft5_tqueue = {
+ .rdata = ft5_data,
+ .num_queues = DIM(ft5_data),
+ .qconfig = ft_qconfig,
+ .q = ft5_q,
+ .q_ramp_up = 1000000,
+ .avg_ramp_up = 1000000,
+ .avg_tolerance = 5, /* 10 percent */
+ .drop_tolerance = 50, /* 50 percent */
+};
+
+struct test_var ft5_tvar = {
+ .wait_usec = 0,
+ .num_iterations = 15,
+ .num_ops = 10000,
+ .clk_freq = 0,
+ .dropped = ft5_dropped,
+ .enqueued = ft5_enqueued,
+ .sleep_sec = 0,
+};
+
+static struct test_config func_test5_config = {
+ .ifname = "functional test 5 interface",
+ .msg = "functional test 5 : use several queues (each with its own run-time data),\n"
+ " use several RED configurations (such that each configuration is shared by multiple queues),\n"
+ " increase average queue size to just below maximum threshold,\n"
+ " compare drop rate to drop probability,\n"
+ " (this is a larger scale version of functional test 2)\n\n",
+ .htxt = "queue "
+ "config "
+ "avg queue size "
+ "min threshold "
+ "max threshold "
+ "drop prob % "
+ "drop rate % "
+ "diff % "
+ "tolerance % "
+ "\n",
+ .tconfig = &ft5_tconfig,
+ .tqueue = &ft5_tqueue,
+ .tvar = &ft5_tvar,
+ .tlevel = ft5_tlevel,
+};
+
+static enum test_result func_test5(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ uint32_t j = 0;
+
+ printf("%s", tcfg->msg);
+
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+
+ printf("%s", tcfg->htxt);
+
+ for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+ rte_red_rt_data_init(&tcfg->tqueue->rdata[j]);
+ tcfg->tqueue->q[j] = 0;
+
+ if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j],
+ &tcfg->tqueue->q[j],
+ *tcfg->tlevel,
+ tcfg->tqueue->q_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+
+ if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j],
+ &tcfg->tqueue->q[j],
+ *tcfg->tlevel,
+ tcfg->tqueue->avg_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+ }
+
+ for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+ uint32_t avg = 0;
+ double drop_rate = 0.0;
+ double drop_prob = 0.0;
+ double diff = 0.0;
+
+ tcfg->tvar->dropped[j] = 0;
+ tcfg->tvar->enqueued[j] = 0;
+
+ enqueue_dequeue_func(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j],
+ &tcfg->tqueue->q[j],
+ tcfg->tvar->num_ops,
+ &tcfg->tvar->enqueued[j],
+ &tcfg->tvar->dropped[j]);
+
+ avg = rte_red_get_avg_int(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j]);
+ if (avg != *tcfg->tlevel)
+ result = FAIL;
+
+ drop_rate = calc_drop_rate(tcfg->tvar->enqueued[j],tcfg->tvar->dropped[j]);
+ drop_prob = calc_drop_prob(tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+ tcfg->tconfig->maxp_inv[tcfg->tqueue->qconfig[j]],
+ *tcfg->tlevel);
+ if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+ result = FAIL;
+
+ printf("%-15u%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+ j, tcfg->tqueue->qconfig[j], avg,
+ tcfg->tconfig->min_th, tcfg->tconfig->max_th,
+ drop_prob * 100.0, drop_rate * 100.0,
+ diff, (double)tcfg->tqueue->drop_tolerance);
+ }
+out:
+ return (result);
+}
+
+/**
+ * Test F6: functional test 6
+ */
+static uint32_t ft6_tlevel[] = {1022};
+static uint8_t ft6_wq_log2[] = {9, 8};
+static uint8_t ft6_maxp_inv[] = {10, 20};
+static struct rte_red_config ft6_config[2];
+static struct rte_red ft6_data[4];
+static uint32_t ft6_q[4];
+
+static struct test_rte_red_config ft6_tconfig = {
+ .rconfig = ft6_config,
+ .num_cfg = DIM(ft6_config),
+ .min_th = 32,
+ .max_th = 1023,
+ .wq_log2 = ft6_wq_log2,
+ .maxp_inv = ft6_maxp_inv,
+};
+
+static struct test_queue ft6_tqueue = {
+ .rdata = ft6_data,
+ .num_queues = DIM(ft6_data),
+ .qconfig = ft_qconfig,
+ .q = ft6_q,
+ .q_ramp_up = 1000000,
+ .avg_ramp_up = 1000000,
+ .avg_tolerance = 5, /* 10 percent */
+ .drop_tolerance = 50, /* 50 percent */
+};
+
+static struct test_config func_test6_config = {
+ .ifname = "functional test 6 interface",
+ .msg = "functional test 6 : use several queues (each with its own run-time data),\n"
+ " use several RED configurations (such that each configuration is sharte_red by multiple queues),\n"
+ " increase average queue size to target level,\n"
+ " dequeue all packets until queue is empty,\n"
+ " confirm that average queue size is computed correctly while queue is empty\n"
+ " (this is a larger scale version of functional test 3)\n\n",
+ .htxt = "queue "
+ "config "
+ "q avg before "
+ "q avg after "
+ "expected "
+ "difference % "
+ "tolerance % "
+ "result ""\n",
+ .tconfig = &ft6_tconfig,
+ .tqueue = &ft6_tqueue,
+ .tvar = &ft_tvar,
+ .tlevel = ft6_tlevel,
+};
+
+static enum test_result func_test6(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ uint32_t j = 0;
+
+ printf("%s", tcfg->msg);
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+ printf("%s", tcfg->htxt);
+
+ for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+ rte_red_rt_data_init(&tcfg->tqueue->rdata[j]);
+ tcfg->tqueue->q[j] = 0;
+
+ if (increase_actual_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j],
+ &tcfg->tqueue->q[j],
+ *tcfg->tlevel,
+ tcfg->tqueue->q_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+ if (increase_average_qsize(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j],
+ &tcfg->tqueue->q[j],
+ *tcfg->tlevel,
+ tcfg->tqueue->avg_ramp_up) != 0) {
+ result = FAIL;
+ goto out;
+ }
+ }
+ for (j = 0; j < tcfg->tqueue->num_queues; j++) {
+ double avg_before = 0;
+ double avg_after = 0;
+ double exp_avg = 0;
+ double diff = 0.0;
+
+ avg_before = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j]);
+
+ /**
+ * empty the queue
+ */
+ tcfg->tqueue->q[j] = 0;
+ rte_red_mark_queue_empty(&tcfg->tqueue->rdata[j], get_port_ts());
+ rte_delay_us(tcfg->tvar->wait_usec);
+
+ /**
+ * enqueue one packet to recalculate average queue size
+ */
+ if (rte_red_enqueue(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j],
+ tcfg->tqueue->q[j],
+ get_port_ts()) == 0) {
+ tcfg->tqueue->q[j]++;
+ } else {
+ printf("%s:%d: packet enqueued on empty queue was dropped\n", __func__, __LINE__);
+ result = FAIL;
+ }
+
+ exp_avg = calc_exp_avg_on_empty(avg_before,
+ (1 << tcfg->tconfig->wq_log2[tcfg->tqueue->qconfig[j]]),
+ tcfg->tvar->wait_usec);
+ avg_after = rte_red_get_avg_float(&tcfg->tconfig->rconfig[tcfg->tqueue->qconfig[j]],
+ &tcfg->tqueue->rdata[j]);
+ if (!check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+ result = FAIL;
+
+ printf("%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+ j, tcfg->tqueue->qconfig[j], avg_before, avg_after,
+ exp_avg, diff, (double)tcfg->tqueue->avg_tolerance,
+ diff <= tcfg->tqueue->avg_tolerance ? "pass" : "fail");
+ }
+out:
+ return (result);
+}
+
+/**
+ * setup default values for the performance test structures
+ */
+static struct rte_red_config pt_wrconfig[1];
+static struct rte_red pt_rtdata[1];
+static uint8_t pt_wq_log2[] = {9};
+static uint8_t pt_maxp_inv[] = {10};
+static uint32_t pt_qconfig[] = {0};
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+
+static struct test_rte_red_config pt_tconfig = {
+ .rconfig = pt_wrconfig,
+ .num_cfg = DIM(pt_wrconfig),
+ .wq_log2 = pt_wq_log2,
+ .min_th = 32,
+ .max_th = 128,
+ .maxp_inv = pt_maxp_inv,
+};
+
+static struct test_queue pt_tqueue = {
+ .rdata = pt_rtdata,
+ .num_queues = DIM(pt_rtdata),
+ .qconfig = pt_qconfig,
+ .q = pt_q,
+ .q_ramp_up = 1000000,
+ .avg_ramp_up = 1000000,
+ .avg_tolerance = 5, /* 10 percent */
+ .drop_tolerance = 50, /* 50 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ */
+static void enqueue_dequeue_perf(struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ uint32_t *q,
+ uint32_t num_ops,
+ uint32_t *enqueued,
+ uint32_t *dropped,
+ struct rdtsc_prof *prof)
+{
+ uint32_t i = 0;
+
+ for (i = 0; i < num_ops; i++) {
+ uint64_t ts = 0;
+ int ret = 0;
+ /**
+ * enqueue
+ */
+ ts = get_port_ts();
+ rdtsc_prof_start(prof);
+ ret = rte_red_enqueue(red_cfg, red, *q, ts );
+ rdtsc_prof_end(prof);
+ if (ret == 0)
+ (*enqueued)++;
+ else
+ (*dropped)++;
+ }
+}
+
+/**
+ * Setup test structures for tests P1, P2, P3
+ * performance tests 1, 2 and 3
+ */
+static uint32_t pt1_tlevel[] = {16};
+static uint32_t pt2_tlevel[] = {80};
+static uint32_t pt3_tlevel[] = {144};
+
+static struct test_var perf1_tvar = {
+ .wait_usec = 0,
+ .num_iterations = 15,
+ .num_ops = 50000000,
+ .clk_freq = 0,
+ .dropped = pt_dropped,
+ .enqueued = pt_enqueued,
+ .sleep_sec = 0
+};
+
+static struct test_config perf1_test1_config = {
+ .ifname = "performance test 1 interface",
+ .msg = "performance test 1 : use one RED configuration,\n"
+ " set actual and average queue sizes to level below min threshold,\n"
+ " measure enqueue performance\n\n",
+ .tconfig = &pt_tconfig,
+ .tqueue = &pt_tqueue,
+ .tvar = &perf1_tvar,
+ .tlevel = pt1_tlevel,
+};
+
+static struct test_config perf1_test2_config = {
+ .ifname = "performance test 2 interface",
+ .msg = "performance test 2 : use one RED configuration,\n"
+ " set actual and average queue sizes to level in between min and max thresholds,\n"
+ " measure enqueue performance\n\n",
+ .tconfig = &pt_tconfig,
+ .tqueue = &pt_tqueue,
+ .tvar = &perf1_tvar,
+ .tlevel = pt2_tlevel,
+};
+
+static struct test_config perf1_test3_config = {
+ .ifname = "performance test 3 interface",
+ .msg = "performance test 3 : use one RED configuration,\n"
+ " set actual and average queue sizes to level above max threshold,\n"
+ " measure enqueue performance\n\n",
+ .tconfig = &pt_tconfig,
+ .tqueue = &pt_tqueue,
+ .tvar = &perf1_tvar,
+ .tlevel = pt3_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ * This runs performance tests 1, 2 and 3
+ */
+static enum test_result perf1_test(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+ uint32_t total = 0;
+
+ printf("%s", tcfg->msg);
+
+ rdtsc_prof_init(&prof, "enqueue");
+
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+
+ /**
+ * set average queue size to target level
+ */
+ *tcfg->tqueue->q = *tcfg->tlevel;
+
+ /**
+ * initialize the rte_red run time data structure
+ */
+ rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+ /**
+ * set the queue average
+ */
+ rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel);
+ if (rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata)
+ != *tcfg->tlevel) {
+ result = FAIL;
+ goto out;
+ }
+
+ enqueue_dequeue_perf(tcfg->tconfig->rconfig,
+ tcfg->tqueue->rdata,
+ tcfg->tqueue->q,
+ tcfg->tvar->num_ops,
+ tcfg->tvar->enqueued,
+ tcfg->tvar->dropped,
+ &prof);
+
+ total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+ printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total,
+ *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+ *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+ rdtsc_prof_print(&prof);
+out:
+ return (result);
+}
+
+/**
+ * Setup test structures for tests P4, P5, P6
+ * performance tests 4, 5 and 6
+ */
+static uint32_t pt4_tlevel[] = {16};
+static uint32_t pt5_tlevel[] = {80};
+static uint32_t pt6_tlevel[] = {144};
+
+static struct test_var perf2_tvar = {
+ .wait_usec = 500,
+ .num_iterations = 10000,
+ .num_ops = 10000,
+ .dropped = pt_dropped,
+ .enqueued = pt_enqueued,
+ .sleep_sec = 0
+};
+
+static struct test_config perf2_test4_config = {
+ .ifname = "performance test 4 interface",
+ .msg = "performance test 4 : use one RED configuration,\n"
+ " set actual and average queue sizes to level below min threshold,\n"
+ " dequeue all packets until queue is empty,\n"
+ " measure enqueue performance when queue is empty\n\n",
+ .htxt = "iteration "
+ "q avg before "
+ "q avg after "
+ "expected "
+ "difference % "
+ "tolerance % "
+ "result ""\n",
+ .tconfig = &pt_tconfig,
+ .tqueue = &pt_tqueue,
+ .tvar = &perf2_tvar,
+ .tlevel = pt4_tlevel,
+};
+
+static struct test_config perf2_test5_config = {
+ .ifname = "performance test 5 interface",
+ .msg = "performance test 5 : use one RED configuration,\n"
+ " set actual and average queue sizes to level in between min and max thresholds,\n"
+ " dequeue all packets until queue is empty,\n"
+ " measure enqueue performance when queue is empty\n\n",
+ .htxt = "iteration "
+ "q avg before "
+ "q avg after "
+ "expected "
+ "difference "
+ "tolerance "
+ "result ""\n",
+ .tconfig = &pt_tconfig,
+ .tqueue = &pt_tqueue,
+ .tvar = &perf2_tvar,
+ .tlevel = pt5_tlevel,
+};
+
+static struct test_config perf2_test6_config = {
+ .ifname = "performance test 6 interface",
+ .msg = "performance test 6 : use one RED configuration,\n"
+ " set actual and average queue sizes to level above max threshold,\n"
+ " dequeue all packets until queue is empty,\n"
+ " measure enqueue performance when queue is empty\n\n",
+ .htxt = "iteration "
+ "q avg before "
+ "q avg after "
+ "expected "
+ "difference % "
+ "tolerance % "
+ "result ""\n",
+ .tconfig = &pt_tconfig,
+ .tqueue = &pt_tqueue,
+ .tvar = &perf2_tvar,
+ .tlevel = pt6_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance when the
+ * queue is empty. This runs performance tests 4, 5 and 6
+ */
+static enum test_result perf2_test(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+ uint32_t total = 0;
+ uint32_t i = 0;
+
+ printf("%s", tcfg->msg);
+
+ rdtsc_prof_init(&prof, "enqueue");
+
+ if (test_rte_red_init(tcfg) != PASS) {
+ result = FAIL;
+ goto out;
+ }
+
+ printf("%s", tcfg->htxt);
+
+ for (i = 0; i < tcfg->tvar->num_iterations; i++) {
+ uint32_t count = 0;
+ uint64_t ts = 0;
+ double avg_before = 0;
+ int ret = 0;
+
+ /**
+ * set average queue size to target level
+ */
+ *tcfg->tqueue->q = *tcfg->tlevel;
+ count = (*tcfg->tqueue->rdata).count;
+
+ /**
+ * initialize the rte_red run time data structure
+ */
+ rte_red_rt_data_init(tcfg->tqueue->rdata);
+ (*tcfg->tqueue->rdata).count = count;
+
+ /**
+ * set the queue average
+ */
+ rte_red_set_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata, *tcfg->tlevel);
+ avg_before = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+ if ((avg_before < *tcfg->tlevel) || (avg_before > *tcfg->tlevel)) {
+ result = FAIL;
+ goto out;
+ }
+
+ /**
+ * empty the queue
+ */
+ *tcfg->tqueue->q = 0;
+ rte_red_mark_queue_empty(tcfg->tqueue->rdata, get_port_ts());
+
+ /**
+ * wait for specified period of time
+ */
+ rte_delay_us(tcfg->tvar->wait_usec);
+
+ /**
+ * measure performance of enqueue operation while queue is empty
+ */
+ ts = get_port_ts();
+ rdtsc_prof_start(&prof);
+ ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata,
+ *tcfg->tqueue->q, ts );
+ rdtsc_prof_end(&prof);
+
+ /**
+ * gather enqueued/dropped statistics
+ */
+ if (ret == 0)
+ (*tcfg->tvar->enqueued)++;
+ else
+ (*tcfg->tvar->dropped)++;
+
+ /**
+ * on first and last iteration, confirm that
+ * average queue size was computed correctly
+ */
+ if ((i == 0) || (i == tcfg->tvar->num_iterations - 1)) {
+ double avg_after = 0;
+ double exp_avg = 0;
+ double diff = 0.0;
+ int ok = 0;
+
+ avg_after = rte_red_get_avg_float(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+ exp_avg = calc_exp_avg_on_empty(avg_before,
+ (1 << *tcfg->tconfig->wq_log2),
+ tcfg->tvar->wait_usec);
+ if (check_avg(&diff, avg_after, exp_avg, (double)tcfg->tqueue->avg_tolerance))
+ ok = 1;
+ printf("%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15.4lf%-15s\n",
+ i, avg_before, avg_after, exp_avg, diff,
+ (double)tcfg->tqueue->avg_tolerance, ok ? "pass" : "fail");
+ if (!ok) {
+ result = FAIL;
+ goto out;
+ }
+ }
+ }
+ total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+ printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n", total,
+ *tcfg->tvar->enqueued, ((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+ *tcfg->tvar->dropped, ((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+ rdtsc_prof_print(&prof);
+out:
+ return (result);
+}
+
+/**
+ * setup default values for overflow test structures
+ */
+static uint32_t avg_max = 0;
+static uint32_t avg_max_bits = 0;
+
+static struct rte_red_config ovfl_wrconfig[1];
+static struct rte_red ovfl_rtdata[1];
+static uint8_t ovfl_maxp_inv[] = {10};
+static uint32_t ovfl_qconfig[] = {0, 0, 1, 1};
+static uint32_t ovfl_q[] ={0};
+static uint32_t ovfl_dropped[] ={0};
+static uint32_t ovfl_enqueued[] ={0};
+static uint32_t ovfl_tlevel[] = {1023};
+static uint8_t ovfl_wq_log2[] = {12};
+
+static struct test_rte_red_config ovfl_tconfig = {
+ .rconfig = ovfl_wrconfig,
+ .num_cfg = DIM(ovfl_wrconfig),
+ .wq_log2 = ovfl_wq_log2,
+ .min_th = 32,
+ .max_th = 1023,
+ .maxp_inv = ovfl_maxp_inv,
+};
+
+static struct test_queue ovfl_tqueue = {
+ .rdata = ovfl_rtdata,
+ .num_queues = DIM(ovfl_rtdata),
+ .qconfig = ovfl_qconfig,
+ .q = ovfl_q,
+ .q_ramp_up = 1000000,
+ .avg_ramp_up = 1000000,
+ .avg_tolerance = 5, /* 10 percent */
+ .drop_tolerance = 50, /* 50 percent */
+};
+
+static struct test_var ovfl_tvar = {
+ .wait_usec = 10000,
+ .num_iterations = 1,
+ .num_ops = 10000,
+ .clk_freq = 0,
+ .dropped = ovfl_dropped,
+ .enqueued = ovfl_enqueued,
+ .sleep_sec = 0
+};
+
+static void ovfl_check_avg(uint32_t avg)
+{
+ if (avg > avg_max) {
+ double avg_log = 0;
+ uint32_t bits = 0;
+ avg_max = avg;
+ avg_log = log(((double)avg_max));
+ avg_log = avg_log / log(2.0);
+ bits = (uint32_t)ceil(avg_log);
+ if (bits > avg_max_bits)
+ avg_max_bits = bits;
+ }
+}
+
+static struct test_config ovfl_test1_config = {
+ .ifname = "queue avergage overflow test interface",
+ .msg = "overflow test 1 : use one RED configuration,\n"
+ " increase average queue size to target level,\n"
+ " check maximum number of bits requirte_red to represent avg_s\n\n",
+ .htxt = "avg queue size "
+ "wq_log2 "
+ "fraction bits "
+ "max queue avg "
+ "num bits "
+ "enqueued "
+ "dropped "
+ "drop prob % "
+ "drop rate % "
+ "\n",
+ .tconfig = &ovfl_tconfig,
+ .tqueue = &ovfl_tqueue,
+ .tvar = &ovfl_tvar,
+ .tlevel = ovfl_tlevel,
+};
+
+static enum test_result ovfl_test1(struct test_config *tcfg)
+{
+ enum test_result result = PASS;
+ uint32_t avg = 0;
+ uint32_t i = 0;
+ double drop_rate = 0.0;
+ double drop_prob = 0.0;
+ double diff = 0.0;
+ int ret = 0;
+
+ printf("%s", tcfg->msg);
+
+ if (test_rte_red_init(tcfg) != PASS) {
+
+ result = FAIL;
+ goto out;
+ }
+
+ /**
+ * reset rte_red run-time data
+ */
+ rte_red_rt_data_init(tcfg->tqueue->rdata);
+
+ /**
+ * increase actual queue size
+ */
+ for (i = 0; i < tcfg->tqueue->q_ramp_up; i++) {
+ ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata,
+ *tcfg->tqueue->q, get_port_ts());
+
+ if (ret == 0) {
+ if (++(*tcfg->tqueue->q) >= *tcfg->tlevel)
+ break;
+ }
+ }
+
+ /**
+ * enqueue
+ */
+ for (i = 0; i < tcfg->tqueue->avg_ramp_up; i++) {
+ ret = rte_red_enqueue(tcfg->tconfig->rconfig, tcfg->tqueue->rdata,
+ *tcfg->tqueue->q, get_port_ts());
+ ovfl_check_avg((*tcfg->tqueue->rdata).avg);
+ avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+ if (avg == *tcfg->tlevel) {
+ if (ret == 0)
+ (*tcfg->tvar->enqueued)++;
+ else
+ (*tcfg->tvar->dropped)++;
+ }
+ }
+
+ /**
+ * check if target average queue size has been reached
+ */
+ avg = rte_red_get_avg_int(tcfg->tconfig->rconfig, tcfg->tqueue->rdata);
+ if (avg != *tcfg->tlevel) {
+ result = FAIL;
+ goto out;
+ }
+
+ /**
+ * check drop rate against drop probability
+ */
+ drop_rate = calc_drop_rate(*tcfg->tvar->enqueued, *tcfg->tvar->dropped);
+ drop_prob = calc_drop_prob(tcfg->tconfig->min_th,
+ tcfg->tconfig->max_th,
+ *tcfg->tconfig->maxp_inv,
+ *tcfg->tlevel);
+ if (!check_drop_rate(&diff, drop_rate, drop_prob, (double)tcfg->tqueue->drop_tolerance))
+ result = FAIL;
+
+ printf("%s", tcfg->htxt);
+
+ printf("%-16u%-9u%-15u0x%08x %-10u%-10u%-10u%-13.2lf%-13.2lf\n",
+ avg, *tcfg->tconfig->wq_log2, RTE_RED_SCALING,
+ avg_max, avg_max_bits,
+ *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+ drop_prob * 100.0, drop_rate * 100.0);
+out:
+ return (result);
+}
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_tests[] = {
+ { &func_test1_config, func_test1 },
+ { &func_test2_config, func_test2 },
+ { &func_test3_config, func_test3 },
+ { &func_test4_config, func_test4 },
+ { &func_test5_config, func_test5 },
+ { &func_test6_config, func_test6 },
+ { &ovfl_test1_config, ovfl_test1 },
+};
+
+struct tests perf_tests[] = {
+ { &perf1_test1_config, perf1_test },
+ { &perf1_test2_config, perf1_test },
+ { &perf1_test3_config, perf1_test },
+ { &perf2_test4_config, perf2_test },
+ { &perf2_test5_config, perf2_test },
+ { &perf2_test6_config, perf2_test },
+};
+
+/**
+ * function to execute the required_red tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count, uint32_t *num_tests, uint32_t *num_pass)
+{
+ enum test_result result = PASS;
+ uint32_t i = 0;
+
+ for (i = 0; i < test_count; i++) {
+ printf("\n--------------------------------------------------------------------------------\n");
+ result = test_type[i].testfn(test_type[i].testcfg);
+ (*num_tests)++;
+ if (result == PASS) {
+ (*num_pass)++;
+ printf("-------------------------------------<pass>-------------------------------------\n");
+ } else {
+ printf("-------------------------------------<fail>-------------------------------------\n");
+ }
+ }
+ return;
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized RED
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+ struct rte_red_config config;
+
+ if (rte_red_rt_data_init(NULL) == 0) {
+ printf("rte_red_rt_data_init should have failed!\n");
+ return -1;
+ }
+
+ if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) {
+ printf("rte_red_config_init should have failed!\n");
+ return -1;
+ }
+
+ if (rte_red_rt_data_init(NULL) == 0) {
+ printf("rte_red_rt_data_init should have failed!\n");
+ return -1;
+ }
+
+ /* NULL config */
+ if (rte_red_config_init(NULL, 0, 0, 0, 0) == 0) {
+ printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+ return -1;
+ }
+ /* min_treshold == max_treshold */
+ if (rte_red_config_init(&config, 0, 1, 1, 0) == 0) {
+ printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+ return -1;
+ }
+ /* min_treshold > max_treshold */
+ if (rte_red_config_init(&config, 0, 2, 1, 0) == 0) {
+ printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+ return -1;
+ }
+ /* wq_log2 > RTE_RED_WQ_LOG2_MAX */
+ if (rte_red_config_init(&config,
+ RTE_RED_WQ_LOG2_MAX + 1, 1, 2, 0) == 0) {
+ printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+ return -1;
+ }
+ /* wq_log2 < RTE_RED_WQ_LOG2_MIN */
+ if (rte_red_config_init(&config,
+ RTE_RED_WQ_LOG2_MIN - 1, 1, 2, 0) == 0) {
+ printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+ return -1;
+ }
+ /* maxp_inv > RTE_RED_MAXP_INV_MAX */
+ if (rte_red_config_init(&config,
+ RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MAX + 1) == 0) {
+ printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+ return -1;
+ }
+ /* maxp_inv < RTE_RED_MAXP_INV_MIN */
+ if (rte_red_config_init(&config,
+ RTE_RED_WQ_LOG2_MIN, 1, 2, RTE_RED_MAXP_INV_MIN - 1) == 0) {
+ printf("%i: rte_red_config_init should have failed!\n", __LINE__);
+ return -1;
+ }
+
+ return 0;
+}
+
+int test_red(void)
+{
+ uint32_t num_tests = 0;
+ uint32_t num_pass = 0;
+ int ret = 0;
+
+ if (test_invalid_parameters() < 0)
+ return -1;
+
+ run_tests(func_tests, DIM(func_tests), &num_tests, &num_pass);
+ run_tests(perf_tests, DIM(perf_tests), &num_tests, &num_pass);
+
+ if (num_pass == num_tests) {
+ printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+ ret = 0;
+ } else {
+ printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass, num_tests - num_pass);
+ ret = -1;
+ }
+ return (ret);
+}
+
+#else
+
+int
+test_red(void)
+{
+ printf("The SCHED library is not included in this build\n");
+ return 0;
+}
+
+#endif
diff --git a/app/test/test_sched.c b/app/test/test_sched.c
new file mode 100755
index 0000000..a0efa52
--- /dev/null
+++ b/app/test/test_sched.c
@@ -0,0 +1,244 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#if defined(RTE_LIBRTE_SCHED) && defined(RTE_ARCH_X86_64)
+
+#include <rte_cycles.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_byteorder.h>
+#include <rte_sched.h>
+
+
+#define VERIFY(exp,fmt,args...) \
+ if (!(exp)) { \
+ printf(fmt, ##args); \
+ return -1; \
+ }
+
+
+#define SUBPORT 0
+#define PIPE 1
+#define TC 2
+#define QUEUE 3
+
+static struct rte_sched_subport_params subport_param[] = {
+ {
+ .tb_rate = 1250000000,
+ .tb_size = 1000000,
+
+ .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+ .tc_period = 10,
+ },
+};
+
+static struct rte_sched_pipe_params pipe_profile[] = {
+ { /* Profile #0 */
+ .tb_rate = 305175,
+ .tb_size = 1000000,
+
+ .tc_rate = {305175, 305175, 305175, 305175},
+ .tc_period = 40,
+
+ .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ },
+};
+
+static struct rte_sched_port_params port_param = {
+ .name = "port_0",
+ .socket = 0, /* computed */
+ .rate = 0, /* computed */
+ .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+ .n_subports_per_port = 1,
+ .n_pipes_per_subport = 4096,
+ .qsize = {64, 64, 64, 64},
+ .pipe_profiles = pipe_profile,
+ .n_pipe_profiles = 1,
+};
+
+#define NB_MBUF 32
+#define MAX_PACKET_SZ 2048
+#define MBUF_SZ (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define PKT_BURST_SZ 32
+#define MEMPOOL_CACHE_SZ PKT_BURST_SZ
+#define SOCKET 0
+
+
+static struct rte_mempool *
+create_mempool(void)
+{
+ struct rte_mempool * mp;
+
+ mp = rte_mempool_lookup("test_sched");
+ if (!mp)
+ mp = rte_mempool_create("test_sched",
+ NB_MBUF,
+ MBUF_SZ,
+ MEMPOOL_CACHE_SZ,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init,
+ NULL,
+ rte_pktmbuf_init,
+ NULL,
+ SOCKET,
+ 0);
+
+ return mp;
+}
+
+static void
+prepare_pkt(struct rte_mbuf *mbuf)
+{
+ struct ether_hdr *eth_hdr;
+ struct vlan_hdr *vlan1, *vlan2;
+ struct ipv4_hdr *ip_hdr;
+
+ /* Simulate a classifier */
+ eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+ vlan1 = (struct vlan_hdr *)(&eth_hdr->ether_type );
+ vlan2 = (struct vlan_hdr *)((uintptr_t)&eth_hdr->ether_type + sizeof(struct vlan_hdr));
+ eth_hdr = (struct ether_hdr *)((uintptr_t)&eth_hdr->ether_type + 2 *sizeof(struct vlan_hdr));
+ ip_hdr = (struct ipv4_hdr *)((uintptr_t)eth_hdr + sizeof(eth_hdr->ether_type));
+
+ vlan1->vlan_tci = rte_cpu_to_be_16(SUBPORT);
+ vlan2->vlan_tci = rte_cpu_to_be_16(PIPE);
+ eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ ip_hdr->dst_addr = IPv4(0,0,TC,QUEUE);
+
+
+ rte_sched_port_pkt_write(mbuf, SUBPORT, PIPE, TC, QUEUE, e_RTE_METER_YELLOW);
+
+ /* 64 byte packet */
+ mbuf->pkt.pkt_len = 60;
+ mbuf->pkt.data_len = 60;
+}
+
+
+/**
+ * test main entrance for library sched
+ */
+int
+test_sched(void)
+{
+ struct rte_mempool *mp = NULL;
+ struct rte_sched_port *port = NULL;
+ uint32_t pipe;
+ struct rte_mbuf *in_mbufs[10];
+ struct rte_mbuf *out_mbufs[10];
+ int i;
+
+ int err;
+
+ mp = create_mempool();
+
+ port_param.socket = 0;
+ port_param.rate = (uint64_t) 10000 * 1000 * 1000 / 8;
+ port_param.name = "port_0";
+
+ port = rte_sched_port_config(&port_param);
+ VERIFY(port != NULL, "Error config sched port\n");
+
+
+ err = rte_sched_subport_config(port, SUBPORT, subport_param);
+ VERIFY(err == 0, "Error config sched, err=%d\n", err);
+
+ for (pipe = 0; pipe < port_param.n_pipes_per_subport; pipe ++) {
+ err = rte_sched_pipe_config(port, SUBPORT, pipe, 0);
+ VERIFY(err == 0, "Error config sched pipe %u, err=%d\n", pipe, err);
+ }
+
+ for (i = 0; i < 10; i++) {
+ in_mbufs[i] = rte_pktmbuf_alloc(mp);
+ prepare_pkt(in_mbufs[i]);
+ }
+
+
+ err = rte_sched_port_enqueue(port, in_mbufs, 10);
+ VERIFY(err == 10, "Wrong enqueue, err=%d\n", err);
+
+ err = rte_sched_port_dequeue(port, out_mbufs, 10);
+ VERIFY(err == 10, "Wrong dequeue, err=%d\n", err);
+
+ for (i = 0; i < 10; i++) {
+ enum rte_meter_color color;
+ uint32_t subport, traffic_class, queue;
+
+ color = rte_sched_port_pkt_read_color(out_mbufs[i]);
+ VERIFY(color == e_RTE_METER_YELLOW, "Wrong color\n");
+
+ rte_sched_port_pkt_read_tree_path(out_mbufs[i],
+ &subport, &pipe, &traffic_class, &queue);
+
+ VERIFY(subport == SUBPORT, "Wrong subport\n");
+ VERIFY(pipe == PIPE, "Wrong pipe\n");
+ VERIFY(traffic_class == TC, "Wrong traffic_class\n");
+ VERIFY(queue == QUEUE, "Wrong queue\n");
+
+ }
+
+
+ struct rte_sched_subport_stats subport_stats;
+ uint32_t tc_ov;
+ rte_sched_subport_read_stats(port, SUBPORT, &subport_stats, &tc_ov);
+ //VERIFY(subport_stats.n_pkts_tc[TC-1] == 10, "Wrong subport stats\n");
+
+ struct rte_sched_queue_stats queue_stats;
+ uint16_t qlen;
+ rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen);
+ //VERIFY(queue_stats.n_pkts == 10, "Wrong queue stats\n");
+
+ rte_sched_port_free(port);
+
+ return 0;
+}
+
+#else /* RTE_LIBRTE_SCHED */
+
+int
+test_sched(void)
+{
+ printf("The Scheduler library is not included in this build\n");
+ return 0;
+}
+#endif /* RTE_LIBRTE_SCHED */
diff --git a/config/defconfig_i686-default-linuxapp-gcc b/config/defconfig_i686-default-linuxapp-gcc
index 5960e85..a63d37a 100644
--- a/config/defconfig_i686-default-linuxapp-gcc
+++ b/config/defconfig_i686-default-linuxapp-gcc
@@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y
CONFIG_RTE_LIBRTE_METER=y
#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
+#
# Compile librte_kni
#
CONFIG_RTE_LIBRTE_KNI=y
diff --git a/config/defconfig_i686-default-linuxapp-icc b/config/defconfig_i686-default-linuxapp-icc
index 6c6a59d..cf86ba5 100644
--- a/config/defconfig_i686-default-linuxapp-icc
+++ b/config/defconfig_i686-default-linuxapp-icc
@@ -235,6 +235,16 @@ CONFIG_RTE_LIBRTE_NET=y
CONFIG_RTE_LIBRTE_METER=y
#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
+#
# Compile librte_kni
#
CONFIG_RTE_LIBRTE_KNI=y
diff --git a/config/defconfig_x86_64-default-linuxapp-gcc b/config/defconfig_x86_64-default-linuxapp-gcc
index 1dcc8c6..b5d3362 100644
--- a/config/defconfig_x86_64-default-linuxapp-gcc
+++ b/config/defconfig_x86_64-default-linuxapp-gcc
@@ -235,6 +235,17 @@ CONFIG_RTE_LIBRTE_NET=y
CONFIG_RTE_LIBRTE_METER=y
#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
+#
+# Compile the test application
# Compile librte_kni
#
CONFIG_RTE_LIBRTE_KNI=y
diff --git a/config/defconfig_x86_64-default-linuxapp-icc b/config/defconfig_x86_64-default-linuxapp-icc
index 7053a10..60f10af 100644
--- a/config/defconfig_x86_64-default-linuxapp-icc
+++ b/config/defconfig_x86_64-default-linuxapp-icc
@@ -235,6 +235,16 @@ CONFIG_RTE_LIBRTE_NET=y
CONFIG_RTE_LIBRTE_METER=y
#
+# Compile librte_sched
+#
+CONFIG_RTE_LIBRTE_SCHED=y
+CONFIG_RTE_SCHED_RED=n
+CONFIG_RTE_SCHED_COLLECT_STATS=n
+CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
+CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
+CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
+
+#
# Compile librte_kni
#
CONFIG_RTE_LIBRTE_KNI=y
diff --git a/examples/qos_sched/Makefile b/examples/qos_sched/Makefile
new file mode 100755
index 0000000..08f4d19
--- /dev/null
+++ b/examples/qos_sched/Makefile
@@ -0,0 +1,58 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-default-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+# binary name
+APP = qos_sched
+
+# all source are stored in SRCS-y
+SRCS-y := main.c args.c init.c app_thread.c cfg_file.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lrte_sched
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
new file mode 100755
index 0000000..afce5ef
--- /dev/null
+++ b/examples/qos_sched/app_thread.c
@@ -0,0 +1,302 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_sched.h>
+
+#include "main.h"
+
+/*
+ * QoS parameters are encoded as follows:
+ * Outer VLAN ID defines subport
+ * Inner VLAN ID defines pipe
+ * Destination IP 0.0.XXX.0 defines traffic class
+ * Destination IP host (0.0.0.XXX) defines queue
+ * Values below define offset to each field from start of frame
+ */
+#define SUBPORT_OFFSET 7
+#define PIPE_OFFSET 9
+#define TC_OFFSET 20
+#define QUEUE_OFFSET 20
+#define COLOR_OFFSET 19
+
+static inline int
+get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe,
+ uint32_t *traffic_class, uint32_t *queue, uint32_t *color)
+{
+ uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *);
+
+ *subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) &
+ (port_params.n_subports_per_port - 1); /* Outer VLAN ID*/
+ *pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) &
+ (port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */
+ *traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) &
+ (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */
+ *queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) &
+ (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */
+ *color = pdata[COLOR_OFFSET] & 0x03; /* Destination IP */
+
+ return 0;
+}
+
+void
+app_rx_thread(struct thread_conf **confs)
+{
+ uint32_t i, nb_rx;
+ struct rte_mbuf *rx_mbufs[burst_conf.rx_burst] __rte_cache_aligned;
+ struct thread_conf *conf;
+ int conf_idx = 0;
+
+ uint32_t subport;
+ uint32_t pipe;
+ uint32_t traffic_class;
+ uint32_t queue;
+ uint32_t color;
+
+ while ((conf = confs[conf_idx])) {
+ nb_rx = rte_eth_rx_burst(conf->rx_port, conf->rx_queue, rx_mbufs,
+ burst_conf.rx_burst);
+
+ if (likely(nb_rx != 0)) {
+ APP_STATS_ADD(conf->stat.nb_rx, nb_rx);
+
+ for(i = 0; i < nb_rx; i++) {
+ get_pkt_sched(rx_mbufs[i],
+ &subport, &pipe, &traffic_class, &queue, &color);
+ rte_sched_port_pkt_write(rx_mbufs[i], subport, pipe,
+ traffic_class, queue, (enum rte_meter_color) color);
+ }
+
+ if (unlikely(rte_ring_sp_enqueue_bulk(conf->rx_ring,
+ (void **)rx_mbufs, nb_rx) != 0)) {
+ for(i = 0; i < nb_rx; i++) {
+ rte_pktmbuf_free(rx_mbufs[i]);
+
+ APP_STATS_ADD(conf->stat.nb_drop, 1);
+ }
+ }
+ }
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
+
+
+
+/* Send the packet to an output interface
+ * For performance reason function returns number of packets dropped, not sent,
+ * so 0 means that all packets were sent successfully
+ */
+
+static inline void
+app_send_burst(struct thread_conf *qconf)
+{
+ struct rte_mbuf **mbufs;
+ uint32_t n, ret;
+
+ mbufs = (struct rte_mbuf **)qconf->m_table;
+ n = qconf->n_mbufs;
+
+ do {
+ ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n);
+ if (unlikely(ret < n)) { /* we cannot drop the packets, so re-send */
+ /* update number of packets to be sent */
+ n -= ret;
+ mbufs = (struct rte_mbuf **)&mbufs[ret];
+ /* limit number of retries to avoid endless loop */
+ /* reset retry counter if some packets were sent */
+ if (likely(ret != 0)) {
+ continue;
+ }
+ }
+ } while (ret != n);
+}
+
+
+/* Send the packet to an output interface */
+static void
+app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt)
+{
+ uint32_t i, len;
+
+ len = qconf->n_mbufs;
+ for(i = 0; i < nb_pkt; i++) {
+ qconf->m_table[len] = mbufs[i];
+ len++;
+ /* enough pkts to be sent */
+ if (unlikely(len == burst_conf.tx_burst)) {
+ qconf->n_mbufs = len;
+ app_send_burst(qconf);
+ len = 0;
+ }
+ }
+
+ qconf->n_mbufs = len;
+}
+
+void
+app_tx_thread(struct thread_conf **confs)
+{
+ struct rte_mbuf *mbufs[burst_conf.qos_dequeue];
+ struct thread_conf *conf;
+ int conf_idx = 0;
+ int retval;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ while ((conf = confs[conf_idx])) {
+ retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs,
+ burst_conf.qos_dequeue);
+ if (likely(retval == 0)) {
+ app_send_packets(conf, mbufs, burst_conf.qos_dequeue);
+
+ conf->counter = 0; /* reset empty read loop counter */
+ }
+
+ conf->counter++;
+
+ /* drain ring and TX queues */
+ if (unlikely(conf->counter > drain_tsc)) {
+ /* now check is there any packets left to be transmitted */
+ if (conf->n_mbufs != 0) {
+ app_send_burst(conf);
+
+ conf->n_mbufs = 0;
+ }
+ conf->counter = 0;
+ }
+
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
+
+
+void
+app_worker_thread(struct thread_conf **confs)
+{
+ struct rte_mbuf *mbufs[burst_conf.ring_burst];
+ struct thread_conf *conf;
+ int conf_idx = 0;
+
+ while ((conf = confs[conf_idx])) {
+ uint32_t nb_pkt;
+ int retval;
+
+ /* Read packet from the ring */
+ retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+ burst_conf.ring_burst);
+ if (likely(retval == 0)) {
+ int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+ burst_conf.ring_burst);
+
+ APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+ APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+ }
+
+ nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+ burst_conf.qos_dequeue);
+ if (likely(nb_pkt > 0))
+ while (rte_ring_sp_enqueue_bulk(conf->tx_ring, (void **)mbufs, nb_pkt) != 0);
+
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
+
+
+void
+app_mixed_thread(struct thread_conf **confs)
+{
+ struct rte_mbuf *mbufs[burst_conf.ring_burst];
+ struct thread_conf *conf;
+ int conf_idx = 0;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ while ((conf = confs[conf_idx])) {
+ uint32_t nb_pkt;
+ int retval;
+
+ /* Read packet from the ring */
+ retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+ burst_conf.ring_burst);
+ if (likely(retval == 0)) {
+ int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+ burst_conf.ring_burst);
+
+ APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+ APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+ }
+
+
+ nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+ burst_conf.qos_dequeue);
+ if (likely(nb_pkt > 0)) {
+ app_send_packets(conf, mbufs, nb_pkt);
+
+ conf->counter = 0; /* reset empty read loop counter */
+ }
+
+ conf->counter++;
+
+ /* drain ring and TX queues */
+ if (unlikely(conf->counter > drain_tsc)) {
+
+ /* now check is there any packets left to be transmitted */
+ if (conf->n_mbufs != 0) {
+ app_send_burst(conf);
+
+ conf->n_mbufs = 0;
+ }
+ conf->counter = 0;
+ }
+
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
+
+
diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
new file mode 100755
index 0000000..c9cfdb2
--- /dev/null
+++ b/examples/qos_sched/args.c
@@ -0,0 +1,467 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <locale.h>
+#include <unistd.h>
+#include <limits.h>
+#include <getopt.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+
+#define APP_NAME "qos_sched"
+#define MAX_OPT_VALUES 8
+#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u/topology/"
+
+static uint32_t app_master_core = 1;
+static uint32_t app_numa_mask;
+static uint64_t app_used_core_mask = 0;
+static uint64_t app_used_port_mask = 0;
+static uint64_t app_used_rx_port_mask = 0;
+static uint64_t app_used_tx_port_mask = 0;
+
+
+static const char usage[] =
+ " \n"
+ " %s <APP PARAMS> \n"
+ " \n"
+ "Application mandatory parameters: \n"
+ " --pfc \"RX PORT, TX PORT, RX LCORE, WT LCORE\" : Packet flow configuration \n"
+ " multiple pfc can be configured in command line \n"
+ " \n"
+ "Application optional parameters: \n"
+ " --mst I : master core index (default value is %u) \n"
+ " --rsz \"A, B, C\" : Ring sizes \n"
+ " A = Size (in number of buffer descriptors) of each of the NIC RX \n"
+ " rings read by the I/O RX lcores (default value is %u) \n"
+ " B = Size (in number of elements) of each of the SW rings used by the\n"
+ " I/O RX lcores to send packets to worker lcores (default value is\n"
+ " %u) \n"
+ " C = Size (in number of buffer descriptors) of each of the NIC TX \n"
+ " rings written by worker lcores (default value is %u) \n"
+ " --bsz \"A, B, C, D\": Burst sizes \n"
+ " A = I/O RX lcore read burst size from NIC RX (default value is %u) \n"
+ " B = I/O RX lcore write burst size to output SW rings, \n"
+ " Worker lcore read burst size from input SW rings, \n"
+ " QoS enqueue size (default value is %u) \n"
+ " C = QoS dequeue size (default value is %u) \n"
+ " D = Worker lcore write burst size to NIC TX (default value is %u) \n"
+ " --rth \"A, B, C\" : RX queue threshold parameters \n"
+ " A = RX prefetch threshold (default value is %u) \n"
+ " B = RX host threshold (default value is %u) \n"
+ " C = RX write-back threshold (default value is %u) \n"
+ " --tth \"A, B, C\" : TX queue threshold parameters \n"
+ " A = TX prefetch threshold (default value is %u) \n"
+ " B = TX host threshold (default value is %u) \n"
+ " C = TX write-back threshold (default value is %u) \n"
+ " --cfg FILE : profile configuration to load \n"
+;
+
+/* display usage */
+static void
+app_usage(const char *prgname)
+{
+ printf(usage, prgname, app_master_core,
+ APP_RX_DESC_DEFAULT, APP_RING_SIZE, APP_TX_DESC_DEFAULT,
+ MAX_PKT_RX_BURST, PKT_ENQUEUE, PKT_DEQUEUE, MAX_PKT_TX_BURST,
+ RX_PTHRESH, RX_HTHRESH, RX_WTHRESH,
+ TX_PTHRESH, TX_HTHRESH, TX_WTHRESH
+ );
+}
+
+static inline int str_is(const char *str, const char *is)
+{
+ return (strcmp(str, is) == 0);
+}
+
+/* returns core mask used by DPDK */
+static uint64_t
+app_eal_core_mask(void)
+{
+ uint32_t i;
+ uint64_t cm = 0;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (cfg->lcore_role[i] == ROLE_RTE)
+ cm |= (1ULL << i);
+ }
+
+ cm |= (1ULL << cfg->master_lcore);
+
+ return cm;
+}
+
+
+/* returns total number of cores presented in a system */
+static uint32_t
+app_cpu_core_count(void)
+{
+ int i, len;
+ char path[PATH_MAX];
+ uint32_t ncores = 0;
+
+ for(i = 0; i < RTE_MAX_LCORE; i++) {
+ len = rte_snprintf(path, sizeof(path), SYS_CPU_DIR, i);
+ if (len <= 0 || (unsigned)len >= sizeof(path))
+ continue;
+
+ if (access(path, F_OK) == 0)
+ ncores++;
+ }
+
+ return ncores;
+}
+
+/* returns:
+ number of values parsed
+ -1 in case of error
+*/
+static int
+app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, uint32_t *opt_vals)
+{
+ char *string;
+ uint32_t i, n_tokens;
+ char *tokens[MAX_OPT_VALUES];
+
+ if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > MAX_OPT_VALUES)
+ return -1;
+
+ /* duplicate configuration string before splitting it to tokens */
+ string = strdup(conf_str);
+ if (string == NULL)
+ return -1;
+
+ n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, separator);
+
+ for(i = 0; i < n_tokens; i++) {
+ opt_vals[i] = (uint32_t)atol(tokens[i]);
+ }
+
+ free(string);
+
+ return n_tokens;
+}
+
+static int
+app_parse_ring_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[3];
+
+ ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+ if (ret != 3)
+ return ret;
+
+ ring_conf.rx_size = vals[0];
+ ring_conf.ring_size = vals[1];
+ ring_conf.tx_size = vals[2];
+
+ return 0;
+}
+
+static int
+app_parse_rth_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[3];
+
+ ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+ if (ret != 3)
+ return ret;
+
+ rx_thresh.pthresh = (uint8_t)vals[0];
+ rx_thresh.hthresh = (uint8_t)vals[1];
+ rx_thresh.wthresh = (uint8_t)vals[2];
+
+ return 0;
+}
+
+static int
+app_parse_tth_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[3];
+
+ ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+ if (ret != 3)
+ return ret;
+
+ tx_thresh.pthresh = (uint8_t)vals[0];
+ tx_thresh.hthresh = (uint8_t)vals[1];
+ tx_thresh.wthresh = (uint8_t)vals[2];
+
+ return 0;
+}
+
+static int
+app_parse_flow_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[5];
+ struct flow_conf *pconf;
+ uint64_t mask;
+
+ ret = app_parse_opt_vals(conf_str, ',', 6, vals);
+ if (ret < 4 || ret > 5)
+ return ret;
+
+ pconf = &qos_conf[nb_pfc];
+
+ pconf->rx_port = (uint8_t)vals[0];
+ pconf->tx_port = (uint8_t)vals[1];
+ pconf->rx_core = (uint8_t)vals[2];
+ pconf->wt_core = (uint8_t)vals[3];
+ if (ret == 5)
+ pconf->tx_core = (uint8_t)vals[4];
+ else
+ pconf->tx_core = pconf->wt_core;
+
+ if (pconf->rx_core == pconf->wt_core) {
+ RTE_LOG(ERR, APP, "pfc %u: rx thread and worker thread cannot share same core\n", nb_pfc);
+ return -1;
+ }
+
+ if (pconf->rx_port >= RTE_MAX_ETHPORTS) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid rx port %hu index\n", nb_pfc, pconf->rx_port);
+ return -1;
+ }
+ if (pconf->tx_port >= RTE_MAX_ETHPORTS) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid tx port %hu index\n", nb_pfc, pconf->rx_port);
+ return -1;
+ }
+
+ mask = 1lu << pconf->rx_port;
+ if (app_used_rx_port_mask & mask) {
+ RTE_LOG(ERR, APP, "pfc %u: rx port %hu is used already\n", nb_pfc, pconf->rx_port);
+ return -1;
+ }
+ app_used_rx_port_mask |= mask;
+ app_used_port_mask |= mask;
+
+ mask = 1lu << pconf->tx_port;
+ if (app_used_tx_port_mask & mask) {
+ RTE_LOG(ERR, APP, "pfc %u: port %hu is used already\n", nb_pfc, pconf->tx_port);
+ return -1;
+ }
+ app_used_tx_port_mask |= mask;
+ app_used_port_mask |= mask;
+
+ mask = 1lu << pconf->rx_core;
+ app_used_core_mask |= mask;
+
+ mask = 1lu << pconf->wt_core;
+ app_used_core_mask |= mask;
+
+ mask = 1lu << pconf->tx_core;
+ app_used_core_mask |= mask;
+
+ nb_pfc++;
+
+ return 0;
+}
+
+static int
+app_parse_burst_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[4];
+
+ ret = app_parse_opt_vals(conf_str, ',', 4, vals);
+ if (ret != 4)
+ return ret;
+
+ burst_conf.rx_burst = (uint16_t)vals[0];
+ burst_conf.ring_burst = (uint16_t)vals[1];
+ burst_conf.qos_dequeue = (uint16_t)vals[2];
+ burst_conf.tx_burst = (uint16_t)vals[3];
+
+ return 0;
+}
+
+/*
+ * Parses the argument given in the command line of the application,
+ * calculates mask for used cores and initializes EAL with calculated core mask
+ */
+int
+app_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ int option_index;
+ const char *optname;
+ char *prgname = argv[0];
+ uint32_t i, nb_lcores;
+
+ static struct option lgopts[] = {
+ { "pfc", 1, 0, 0 },
+ { "mst", 1, 0, 0 },
+ { "rsz", 1, 0, 0 },
+ { "bsz", 1, 0, 0 },
+ { "rth", 1, 0, 0 },
+ { "tth", 1, 0, 0 },
+ { "cfg", 1, 0, 0 },
+ { NULL, 0, 0, 0 }
+ };
+
+ /* initialize EAL first */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ return -1;
+
+ argc -= ret;
+ argv += ret;
+
+ /* set en_US locale to print big numbers with ',' */
+ setlocale(LC_NUMERIC, "en_US.utf-8");
+
+ while ((opt = getopt_long(argc, argv, "",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* long options */
+ case 0:
+ optname = lgopts[option_index].name;
+ if (str_is(optname, "pfc")) {
+ ret = app_parse_flow_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid pipe configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "mst")) {
+ app_master_core = (uint32_t)atoi(optarg);
+ break;
+ }
+ if (str_is(optname, "rsz")) {
+ ret = app_parse_ring_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid ring configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "bsz")) {
+ ret = app_parse_burst_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid burst configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "rth")) {
+ ret = app_parse_rth_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid RX threshold configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "tth")) {
+ ret = app_parse_tth_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid TX threshold configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "cfg")) {
+ cfg_profile = optarg;
+ break;
+ }
+ break;
+
+ default:
+ app_usage(prgname);
+ return -1;
+ }
+ }
+
+ /* check master core index validity */
+ for(i = 0; i <= app_master_core; i++) {
+ if (app_used_core_mask & (1u << app_master_core)) {
+ RTE_LOG(ERR, APP, "Master core index is not configured properly\n");
+ app_usage(prgname);
+ return -1;
+ }
+ }
+ app_used_core_mask |= 1u << app_master_core;
+
+ if ((app_used_core_mask != app_eal_core_mask()) ||
+ (app_master_core != rte_get_master_lcore())) {
+ RTE_LOG(ERR, APP, "EAL core mask not configured properly, must be %" PRIx64
+ " instead of %" PRIx64 "\n" , app_used_core_mask, app_eal_core_mask());
+ return -1;
+ }
+
+ if (nb_pfc == 0) {
+ RTE_LOG(ERR, APP, "Packet flow not configured!\n");
+ app_usage(prgname);
+ return -1;
+ }
+
+ /* sanity check for cores assignment */
+ nb_lcores = app_cpu_core_count();
+
+ for(i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].rx_core >= nb_lcores) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid RX lcore index %u\n", i + 1,
+ qos_conf[i].rx_core);
+ return -1;
+ }
+ if (qos_conf[i].wt_core >= nb_lcores) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid WT lcore index %u\n", i + 1,
+ qos_conf[i].wt_core);
+ return -1;
+ }
+ uint32_t rx_sock = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+ uint32_t wt_sock = rte_lcore_to_socket_id(qos_conf[i].wt_core);
+ if (rx_sock != wt_sock) {
+ RTE_LOG(ERR, APP, "pfc %u: RX and WT must be on the same socket\n", i + 1);
+ return -1;
+ }
+ app_numa_mask |= 1 << rte_lcore_to_socket_id(qos_conf[i].rx_core);
+ }
+
+ return 0;
+}
+
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
new file mode 100755
index 0000000..85f9c02
--- /dev/null
+++ b/examples/qos_sched/cfg_file.c
@@ -0,0 +1,631 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <rte_string_fns.h>
+#include <rte_sched.h>
+
+#include "cfg_file.h"
+#include "main.h"
+
+
+/** when we resize a file structure, how many extra entries
+ * for new sections do we add in */
+#define CFG_ALLOC_SECTION_BATCH 8
+/** when we resize a section structure, how many extra entries
+ * for new entries do we add in */
+#define CFG_ALLOC_ENTRY_BATCH 16
+
+static unsigned
+_strip(char *str, unsigned len)
+{
+ int newlen = len;
+ if (len == 0)
+ return 0;
+
+ if (isspace(str[len-1])) {
+ /* strip trailing whitespace */
+ while (newlen > 0 && isspace(str[newlen - 1]))
+ str[--newlen] = '\0';
+ }
+
+ if (isspace(str[0])) {
+ /* strip leading whitespace */
+ int i,start = 1;
+ while (isspace(str[start]) && start < newlen)
+ start++
+ ; /* do nothing */
+ newlen -= start;
+ for (i = 0; i < newlen; i++)
+ str[i] = str[i+start];
+ str[i] = '\0';
+ }
+ return newlen;
+}
+
+struct cfg_file *
+cfg_load(const char *filename, int flags)
+{
+ int allocated_sections = CFG_ALLOC_SECTION_BATCH;
+ int allocated_entries = 0;
+ int curr_section = -1;
+ int curr_entry = -1;
+ char buffer[256];
+ int lineno = 0;
+ struct cfg_file *cfg = NULL;
+
+ FILE *f = fopen(filename, "r");
+ if (f == NULL)
+ return NULL;
+
+ cfg = malloc(sizeof(*cfg) + sizeof(cfg->sections[0]) * allocated_sections);
+ if (cfg == NULL)
+ goto error2;
+
+ memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections);
+
+ while (fgets(buffer, sizeof(buffer), f) != NULL) {
+ char *pos = NULL;
+ size_t len = strnlen(buffer, sizeof(buffer));
+ lineno++;
+ if (len >=sizeof(buffer) - 1 && buffer[len-1] != '\n'){
+ printf("Error line %d - no \\n found on string. "
+ "Check if line too long\n", lineno);
+ goto error1;
+ }
+ if ((pos = memchr(buffer, ';', sizeof(buffer))) != NULL) {
+ *pos = '\0';
+ len = pos - buffer;
+ }
+
+ len = _strip(buffer, len);
+ if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL)
+ continue;
+
+ if (buffer[0] == '[') {
+ /* section heading line */
+ char *end = memchr(buffer, ']', len);
+ if (end == NULL) {
+ printf("Error line %d - no terminating '[' found\n", lineno);
+ goto error1;
+ }
+ *end = '\0';
+ _strip(&buffer[1], end - &buffer[1]);
+
+ /* close off old section and add start new one */
+ if (curr_section >= 0)
+ cfg->sections[curr_section]->num_entries = curr_entry + 1;
+ curr_section++;
+
+ /* resize overall struct if we don't have room for more sections */
+ if (curr_section == allocated_sections) {
+ allocated_sections += CFG_ALLOC_SECTION_BATCH;
+ struct cfg_file *n_cfg = realloc(cfg, sizeof(*cfg) +
+ sizeof(cfg->sections[0]) * allocated_sections);
+ if (n_cfg == NULL) {
+ printf("Error - no more memory\n");
+ goto error1;
+ }
+ cfg = n_cfg;
+ }
+
+ /* allocate space for new section */
+ allocated_entries = CFG_ALLOC_ENTRY_BATCH;
+ curr_entry = -1;
+ cfg->sections[curr_section] = malloc(sizeof(*cfg->sections[0]) +
+ sizeof(cfg->sections[0]->entries[0]) * allocated_entries);
+ if (cfg->sections[curr_section] == NULL) {
+ printf("Error - no more memory\n");
+ goto error1;
+ }
+
+ rte_snprintf(cfg->sections[curr_section]->name,
+ sizeof(cfg->sections[0]->name),
+ "%s", &buffer[1]);
+ }
+ else {
+ /* value line */
+ if (curr_section < 0) {
+ printf("Error line %d - value outside of section\n", lineno);
+ goto error1;
+ }
+
+ struct cfg_section *sect = cfg->sections[curr_section];
+ char *split[2];
+ if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') != 2) {
+ printf("Error at line %d - cannot split string\n", lineno);
+ goto error1;
+ }
+
+ curr_entry++;
+ if (curr_entry == allocated_entries) {
+ allocated_entries += CFG_ALLOC_ENTRY_BATCH;
+ struct cfg_section *n_sect = realloc(sect, sizeof(*sect) +
+ sizeof(sect->entries[0]) * allocated_entries);
+ if (n_sect == NULL) {
+ printf("Error - no more memory\n");
+ goto error1;
+ }
+ sect = cfg->sections[curr_section] = n_sect;
+ }
+
+ sect->entries[curr_entry] = malloc(sizeof(*sect->entries[0]));
+ if (sect->entries[curr_entry] == NULL) {
+ printf("Error - no more memory\n");
+ goto error1;
+ }
+
+ struct cfg_entry *entry = sect->entries[curr_entry];
+ rte_snprintf(entry->name, sizeof(entry->name), "%s", split[0]);
+ rte_snprintf(entry->value, sizeof(entry->value), "%s", split[1]);
+ _strip(entry->name, strnlen(entry->name, sizeof(entry->name)));
+ _strip(entry->value, strnlen(entry->value, sizeof(entry->value)));
+ }
+ }
+ fclose(f);
+ cfg->flags = flags;
+ cfg->sections[curr_section]->num_entries = curr_entry + 1;
+ cfg->num_sections = curr_section + 1;
+ return cfg;
+
+error1:
+ cfg_close(cfg);
+error2:
+ fclose(f);
+ return NULL;
+}
+
+
+int cfg_close(struct cfg_file *cfg)
+{
+ int i, j;
+
+ if (cfg == NULL)
+ return -1;
+
+ for(i = 0; i < cfg->num_sections; i++) {
+ if (cfg->sections[i] != NULL) {
+ if (cfg->sections[i]->num_entries) {
+ for(j = 0; j < cfg->sections[i]->num_entries; j++) {
+ if (cfg->sections[i]->entries[j] != NULL)
+ free(cfg->sections[i]->entries[j]);
+ }
+ }
+ free(cfg->sections[i]);
+ }
+ }
+ free(cfg);
+
+ return 0;
+}
+
+int
+cfg_num_sections(struct cfg_file *cfg, const char *sectionname, size_t length)
+{
+ int i;
+ int num_sections = 0;
+ for (i = 0; i < cfg->num_sections; i++) {
+ if (strncmp(cfg->sections[i]->name, sectionname, length) == 0)
+ num_sections++;
+ }
+ return num_sections;
+}
+
+int
+cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections)
+{
+ int i;
+ for (i = 0; i < cfg->num_sections && i < max_sections; i++) {
+ rte_snprintf(sections[i], CFG_NAME_LEN, "%s", cfg->sections[i]->name);
+ }
+ return i;
+}
+
+static const struct cfg_section *
+_get_section(struct cfg_file *cfg, const char *sectionname)
+{
+ int i;
+ for (i = 0; i < cfg->num_sections; i++) {
+ if (strncmp(cfg->sections[i]->name, sectionname,
+ sizeof(cfg->sections[0]->name)) == 0)
+ return cfg->sections[i];
+ }
+ return NULL;
+}
+
+int
+cfg_has_section(struct cfg_file *cfg, const char *sectionname)
+{
+ return (_get_section(cfg, sectionname) != NULL);
+}
+
+int
+cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname)
+{
+ const struct cfg_section *s = _get_section(cfg, sectionname);
+ if (s == NULL)
+ return -1;
+ return s->num_entries;
+}
+
+
+int
+cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
+ struct cfg_entry *entries, int max_entries)
+{
+ int i;
+ const struct cfg_section *sect = _get_section(cfg, sectionname);
+ if (sect == NULL)
+ return -1;
+ for (i = 0; i < max_entries && i < sect->num_entries; i++)
+ entries[i] = *sect->entries[i];
+ return i;
+}
+
+const char *
+cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
+ const char *entryname)
+{
+ int i;
+ const struct cfg_section *sect = _get_section(cfg, sectionname);
+ if (sect == NULL)
+ return NULL;
+ for (i = 0; i < sect->num_entries; i++)
+ if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) == 0)
+ return sect->entries[i]->value;
+ return NULL;
+}
+
+int
+cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
+ const char *entryname)
+{
+ return (cfg_get_entry(cfg, sectionname, entryname) != NULL);
+}
+
+
+int
+cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port_params)
+{
+ const char *entry;
+ int j;
+
+ if (!cfg || !port_params)
+ return -1;
+
+ entry = cfg_get_entry(cfg, "port", "frame overhead");
+ if (entry)
+ port_params->frame_overhead = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, "port", "number of subports per port");
+ if (entry)
+ port_params->n_subports_per_port = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, "port", "number of pipes per subport");
+ if (entry)
+ port_params->n_pipes_per_subport = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, "port", "queue sizes");
+ if (entry) {
+ char *next;
+
+ for(j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+ port_params->qsize[j] = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+#ifdef RTE_SCHED_RED
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+ char str[32];
+
+ /* Parse WRED min thresholds */
+ rte_snprintf(str, sizeof(str), "tc %d wred min", j);
+ entry = cfg_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].min_th
+ = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED max thresholds */
+ rte_snprintf(str, sizeof(str), "tc %d wred max", j);
+ entry = cfg_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].max_th
+ = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED inverse mark probabilities */
+ rte_snprintf(str, sizeof(str), "tc %d wred inv prob", j);
+ entry = cfg_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].maxp_inv
+ = (uint8_t)strtol(entry, &next, 10);
+
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED EWMA filter weights */
+ rte_snprintf(str, sizeof(str), "tc %d wred weight", j);
+ entry = cfg_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].wq_log2
+ = (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ }
+#endif /* RTE_SCHED_RED */
+
+ return 0;
+}
+
+int
+cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe_params)
+{
+ int i, j;
+ char *next;
+ const char *entry;
+ int profiles;
+
+ if (!cfg || !pipe_params)
+ return -1;
+
+ profiles = cfg_num_sections(cfg, "pipe profile", sizeof("pipe profile") - 1);
+ port_params.n_pipe_profiles = profiles;
+
+ for (j = 0; j < profiles; j++) {
+ char pipe_name[32];
+ rte_snprintf(pipe_name, sizeof(pipe_name), "pipe profile %d", j);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tb rate");
+ if (entry)
+ pipe_params[j].tb_rate = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tb size");
+ if (entry)
+ pipe_params[j].tb_size = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc period");
+ if (entry)
+ pipe_params[j].tc_period = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 0 rate");
+ if (entry)
+ pipe_params[j].tc_rate[0] = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 1 rate");
+ if (entry)
+ pipe_params[j].tc_rate[1] = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 2 rate");
+ if (entry)
+ pipe_params[j].tc_rate[2] = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 3 rate");
+ if (entry)
+ pipe_params[j].tc_rate[3] = (uint32_t)atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ entry = cfg_get_entry(cfg, pipe_name, "tc 0 oversubscription weight");
+ if (entry)
+ pipe_params[j].tc_ov_weight[0] = (uint8_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 1 oversubscription weight");
+ if (entry)
+ pipe_params[j].tc_ov_weight[1] = (uint8_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 2 oversubscription weight");
+ if (entry)
+ pipe_params[j].tc_ov_weight[2] = (uint8_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 3 oversubscription weight");
+ if (entry)
+ pipe_params[j].tc_ov_weight[3] = (uint8_t)atoi(entry);
+#endif
+
+ entry = cfg_get_entry(cfg, pipe_name, "tc 0 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ entry = cfg_get_entry(cfg, pipe_name, "tc 1 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ entry = cfg_get_entry(cfg, pipe_name, "tc 2 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ entry = cfg_get_entry(cfg, pipe_name, "tc 3 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ }
+ return 0;
+}
+
+int
+cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport_params)
+{
+ const char *entry;
+ int i, j, k;
+
+ if (!cfg || !subport_params)
+ return -1;
+
+ memset(app_pipe_to_profile, -1, sizeof(app_pipe_to_profile));
+
+ for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
+ char sec_name[CFG_NAME_LEN];
+ rte_snprintf(sec_name, sizeof(sec_name), "subport %d", i);
+
+ if (cfg_has_section(cfg, sec_name)) {
+ entry = cfg_get_entry(cfg, sec_name, "tb rate");
+ if (entry)
+ subport_params[i].tb_rate = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, sec_name, "tb size");
+ if (entry)
+ subport_params[i].tb_size = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, sec_name, "tc period");
+ if (entry)
+ subport_params[i].tc_period = (uint32_t)atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ entry = cfg_get_entry(cfg, sec_name, "tc oversubscription period");
+ if (entry)
+ subport_params[i].tc_ov_period = (uint32_t)atoi(entry);
+#endif
+
+ entry = cfg_get_entry(cfg, sec_name, "tc 0 rate");
+ if (entry)
+ subport_params[i].tc_rate[0] = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, sec_name, "tc 1 rate");
+ if (entry)
+ subport_params[i].tc_rate[1] = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, sec_name, "tc 2 rate");
+ if (entry)
+ subport_params[i].tc_rate[2] = (uint32_t)atoi(entry);
+
+ entry = cfg_get_entry(cfg, sec_name, "tc 3 rate");
+ if (entry)
+ subport_params[i].tc_rate[3] = (uint32_t)atoi(entry);
+
+ int n_entries = cfg_section_num_entries(cfg, sec_name);
+ struct cfg_entry entries[n_entries];
+
+ cfg_section_entries(cfg, sec_name, entries, n_entries);
+
+ for (j = 0; j < n_entries; j++) {
+ if (strncmp("pipe", entries[j].name, sizeof("pipe") - 1) == 0) {
+ int profile;
+ char *tokens[2] = {NULL, NULL};
+ int n_tokens;
+ int begin, end;
+
+ profile = atoi(entries[j].value);
+ n_tokens = rte_strsplit(&entries[j].name[sizeof("pipe")],
+ strnlen(entries[j].name, CFG_NAME_LEN), tokens, 2, '-');
+
+ begin = atoi(tokens[0]);
+ if (n_tokens == 2)
+ end = atoi(tokens[1]);
+ else
+ end = begin;
+
+ if (end >= MAX_SCHED_PIPES || begin > end)
+ return -1;
+
+ for (k = begin; k <= end; k++) {
+ char profile_name[CFG_NAME_LEN];
+
+ rte_snprintf(profile_name, sizeof(profile_name),
+ "pipe profile %d", profile);
+ if (cfg_has_section(cfg, profile_name))
+ app_pipe_to_profile[i][k] = profile;
+ else
+ rte_exit(EXIT_FAILURE, "Wrong pipe profile %s\n",
+ entries[j].value);
+
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
new file mode 100755
index 0000000..2e265e6
--- /dev/null
+++ b/examples/qos_sched/cfg_file.h
@@ -0,0 +1,103 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __CFG_FILE_H__
+#define __CFG_FILE_H__
+
+#include <rte_sched.h>
+
+#define CFG_NAME_LEN 32
+#define CFG_VALUE_LEN 64
+
+struct cfg_entry {
+ char name[CFG_NAME_LEN];
+ char value[CFG_VALUE_LEN];
+};
+
+struct cfg_section {
+ char name[CFG_NAME_LEN];
+ int num_entries;
+ struct cfg_entry *entries[0];
+};
+
+struct cfg_file {
+ int flags;
+ int num_sections;
+ struct cfg_section *sections[0];
+};
+
+
+int cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port);
+
+int cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe);
+
+int cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport);
+
+
+/* reads a config file from disk and returns a handle to the config
+ * 'flags' is reserved for future use and must be 0
+ */
+struct cfg_file *cfg_load(const char *filename, int flags);
+
+/* returns the number of sections in the config */
+int cfg_num_sections(struct cfg_file *cfg, const char *sec_name, size_t length);
+
+/* fills the array "sections" with the names of all the sections in the file
+ * (up to a max of max_sections).
+ * NOTE: buffers in the sections array must be at least CFG_NAME_LEN big
+ */
+int cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections);
+
+/* true if the named section exists, false otherwise */
+int cfg_has_section(struct cfg_file *cfg, const char *sectionname);
+
+/* returns the number of entries in a section */
+int cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname);
+
+/* returns the entries in a section as key-value pairs in the "entries" array */
+int cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
+ struct cfg_entry *entries, int max_entries);
+
+/* returns a pointer to the value of the named entry in the named section */
+const char *cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
+ const char *entryname);
+
+/* true if the given entry exists in the given section, false otherwise */
+int cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
+ const char *entryname);
+
+/* cleans up memory allocated by cfg_load() */
+int cfg_close(struct cfg_file *cfg);
+
+#endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
new file mode 100755
index 0000000..1654c73
--- /dev/null
+++ b/examples/qos_sched/init.c
@@ -0,0 +1,385 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdint.h>
+#include <memory.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_debug.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_sched.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+#include "cfg_file.h"
+
+uint32_t app_numa_mask = 0;
+static uint32_t app_inited_port_mask = 0;
+
+int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+#define MAX_NAME_LEN 32
+
+struct ring_conf ring_conf = {
+ .rx_size = APP_RX_DESC_DEFAULT,
+ .ring_size = APP_RING_SIZE,
+ .tx_size = APP_TX_DESC_DEFAULT,
+};
+
+struct burst_conf burst_conf = {
+ .rx_burst = MAX_PKT_RX_BURST,
+ .ring_burst = PKT_ENQUEUE,
+ .qos_dequeue = PKT_DEQUEUE,
+ .tx_burst = MAX_PKT_TX_BURST,
+};
+
+struct ring_thresh rx_thresh = {
+ .pthresh = RX_PTHRESH,
+ .hthresh = RX_HTHRESH,
+ .wthresh = RX_WTHRESH,
+};
+
+struct ring_thresh tx_thresh = {
+ .pthresh = TX_PTHRESH,
+ .hthresh = TX_HTHRESH,
+ .wthresh = TX_WTHRESH,
+};
+
+uint32_t nb_pfc;
+const char *cfg_profile = NULL;
+struct flow_conf qos_conf[MAX_DATA_STREAMS];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_DCB_NONE,
+ },
+};
+
+static int
+app_init_port(uint8_t portid, struct rte_mempool *mp)
+{
+ int ret;
+ struct rte_eth_link link;
+ struct rte_eth_rxconf rx_conf;
+ struct rte_eth_txconf tx_conf;
+
+ /* check if port already initialized (multistream configuration) */
+ if (app_inited_port_mask & (1u << portid))
+ return 0;
+
+ rx_conf.rx_thresh.pthresh = rx_thresh.pthresh;
+ rx_conf.rx_thresh.hthresh = rx_thresh.hthresh;
+ rx_conf.rx_thresh.wthresh = rx_thresh.wthresh;
+ rx_conf.rx_free_thresh = 32;
+ rx_conf.rx_drop_en = 0;
+
+ tx_conf.tx_thresh.pthresh = tx_thresh.pthresh;
+ tx_conf.tx_thresh.hthresh = tx_thresh.hthresh;
+ tx_conf.tx_thresh.wthresh = tx_thresh.wthresh;
+ tx_conf.tx_free_thresh = 0;
+ tx_conf.tx_rs_thresh = 0;
+ tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS;
+
+ /* init port */
+ RTE_LOG(INFO, APP, "Initializing port %hu... ", portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%hu\n",
+ ret, portid);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size,
+ rte_eth_dev_socket_id(portid), &rx_conf, mp);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, port=%hu\n",
+ ret, portid);
+
+ /* init one TX queue */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0,
+ (uint16_t)ring_conf.tx_size, rte_eth_dev_socket_id(portid), &tx_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+ "port=%hu queue=%d\n",
+ ret, portid, 0);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_pmd_port_start: err=%d, port=%hu\n",
+ ret, portid);
+
+ printf("done: ");
+
+ /* get link status */
+ rte_eth_link_get(portid, &link);
+ if (link.link_status) {
+ printf(" Link Up - speed %u Mbps - %s\n",
+ (uint32_t) link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ } else {
+ printf(" Link Down\n");
+ }
+ rte_eth_promiscuous_enable(portid);
+
+ /* mark port as initialized */
+ app_inited_port_mask |= 1u << portid;
+
+ return 0;
+}
+
+static struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+ {
+ .tb_rate = 1250000000,
+ .tb_size = 1000000,
+
+ .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+ .tc_period = 10,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ .tc_ov_period = 10,
+#endif
+ },
+};
+
+static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT] = {
+ { /* Profile #0 */
+ .tb_rate = 305175,
+ .tb_size = 1000000,
+
+ .tc_rate = {305175, 305175, 305175, 305175},
+ .tc_period = 40,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ .tc_ov_weight = {1, 1, 1, 1},
+#endif
+
+ .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ },
+};
+
+struct rte_sched_port_params port_params = {
+ .name = "port_0",
+ .socket = 0, /* computed */
+ .rate = 0, /* computed */
+ .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+ .n_subports_per_port = 1,
+ .n_pipes_per_subport = 4096,
+ .qsize = {64, 64, 64, 64},
+ .pipe_profiles = pipe_profiles,
+ .n_pipe_profiles = 1,
+
+#ifdef RTE_SCHED_RED
+ .red_params = {
+ /* Traffic Class 0 Colors Green / Yellow / Red */
+ [0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+ /* Traffic Class 1 - Colors Green / Yellow / Red */
+ [1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+ /* Traffic Class 2 - Colors Green / Yellow / Red */
+ [2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+ /* Traffic Class 3 - Colors Green / Yellow / Red */
+ [3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}
+ }
+#endif /* RTE_SCHED_RED */
+};
+
+static struct rte_sched_port *
+app_init_sched_port(uint32_t portid, uint32_t socketid)
+{
+ static char port_name[32]; /* static as referenced from global port_params*/
+ struct rte_eth_link link;
+ struct rte_sched_port *port = NULL;
+ uint32_t pipe, subport;
+ int err;
+
+ rte_eth_link_get((uint8_t)portid, &link);
+
+ port_params.socket = socketid;
+ port_params.rate = (uint64_t) link.link_speed * 1000 * 1000 / 8;
+ rte_snprintf(port_name, sizeof(port_name), "port_%d", portid);
+ port_params.name = port_name;
+
+ port = rte_sched_port_config(&port_params);
+ if (port == NULL){
+ rte_exit(EXIT_FAILURE, "Unable to config sched port\n");
+ }
+
+ for (subport = 0; subport < port_params.n_subports_per_port; subport ++) {
+ err = rte_sched_subport_config(port, subport, &subport_params[subport]);
+ if (err) {
+ rte_exit(EXIT_FAILURE, "Unable to config sched subport %u, err=%d\n",
+ subport, err);
+ }
+
+ for (pipe = 0; pipe < port_params.n_pipes_per_subport; pipe ++) {
+ if (app_pipe_to_profile[subport][pipe] != -1) {
+ err = rte_sched_pipe_config(port, subport, pipe,
+ app_pipe_to_profile[subport][pipe]);
+ if (err) {
+ rte_exit(EXIT_FAILURE, "Unable to config sched pipe %u "
+ "for profile %d, err=%d\n", pipe,
+ app_pipe_to_profile[subport][pipe], err);
+ }
+ }
+ }
+ }
+
+ return port;
+}
+
+static int
+app_load_cfg_profile(const char *profile)
+{
+ if (profile == NULL)
+ return 0;
+
+ struct cfg_file *cfg_file = cfg_load(profile, 0);
+ if (cfg_file == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot load configuration profile %s\n", profile);
+
+ cfg_load_port(cfg_file, &port_params);
+ cfg_load_subport(cfg_file, subport_params);
+ cfg_load_pipe(cfg_file, pipe_profiles);
+
+ cfg_close(cfg_file);
+
+ return 0;
+}
+
+int app_init(void)
+{
+ uint32_t i;
+ char ring_name[MAX_NAME_LEN];
+ char pool_name[MAX_NAME_LEN];
+
+ /* init driver(s) */
+ if (rte_pmd_init_all() < 0)
+ rte_exit(EXIT_FAILURE, "Cannot init PMD\n");
+
+ if (rte_eal_pci_probe() < 0)
+ rte_exit(EXIT_FAILURE, "Cannot probe PCI\n");
+
+ if (rte_eth_dev_count() == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet port - bye\n");
+
+ /* load configuration profile */
+ if (app_load_cfg_profile(cfg_profile) != 0)
+ rte_exit(EXIT_FAILURE, "Invalid configuration profile\n");
+
+ /* Initialize each active flow */
+ for(i = 0; i < nb_pfc; i++) {
+ uint32_t socket = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+ struct rte_ring *ring;
+
+ rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].rx_core);
+ ring = rte_ring_lookup(ring_name);
+ if (ring == NULL)
+ qos_conf[i].rx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+ socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+ else
+ qos_conf[i].rx_ring = ring;
+
+ rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].tx_core);
+ ring = rte_ring_lookup(ring_name);
+ if (ring == NULL)
+ qos_conf[i].tx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+ socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+ else
+ qos_conf[i].tx_ring = ring;
+
+
+ /* create the mbuf pools for each RX Port */
+ rte_snprintf(pool_name, MAX_NAME_LEN, "mbuf_pool%u", i);
+ qos_conf[i].mbuf_pool = rte_mempool_create(pool_name, NB_MBUF, MBUF_SIZE,
+ burst_conf.rx_burst * 4,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_eth_dev_socket_id(qos_conf[i].rx_port),
+ 0);
+ if (qos_conf[i].mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool for socket %u\n", i);
+
+ //printf("MP = %d\n", rte_mempool_count(qos_conf[i].app_pktmbuf_pool));
+
+ app_init_port(qos_conf[i].rx_port, qos_conf[i].mbuf_pool);
+ app_init_port(qos_conf[i].tx_port, qos_conf[i].mbuf_pool);
+
+ qos_conf[i].sched_port = app_init_sched_port(qos_conf[i].rx_port, socket);
+ }
+
+ RTE_LOG(INFO, APP, "time stamp clock running at %" PRIu64 " Hz\n",
+ rte_get_timer_hz());
+
+ RTE_LOG(INFO, APP, "Ring sizes: NIC RX = %u, Mempool = %d SW queue = %u,"
+ "NIC TX = %u\n", ring_conf.rx_size, NB_MBUF, ring_conf.ring_size,
+ ring_conf.tx_size);
+
+ RTE_LOG(INFO, APP, "Burst sizes: RX read = %hu, RX write = %hu,\n"
+ " Worker read/QoS enqueue = %hu,\n"
+ " QoS dequeue = %hu, Worker write = %hu\n",
+ burst_conf.rx_burst, burst_conf.ring_burst, burst_conf.ring_burst,
+ burst_conf.qos_dequeue, burst_conf.tx_burst);
+
+ RTE_LOG(INFO, APP, "NIC thresholds RX (p = %hhu, h = %hhu, w = %hhu),"
+ "TX (p = %hhu, h = %hhu, w = %hhu)\n",
+ rx_thresh.pthresh, rx_thresh.hthresh, rx_thresh.wthresh,
+ tx_thresh.pthresh, tx_thresh.hthresh, tx_thresh.wthresh);
+
+ return 0;
+}
diff --git a/examples/qos_sched/main.c b/examples/qos_sched/main.c
new file mode 100755
index 0000000..b6cbe35
--- /dev/null
+++ b/examples/qos_sched/main.c
@@ -0,0 +1,246 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+
+#include <rte_sched.h>
+
+#include "main.h"
+
+#define APP_MODE_NONE 0
+#define APP_RX_MODE 1
+#define APP_WT_MODE 2
+#define APP_TX_MODE 4
+
+
+/* main processing loop */
+static int
+app_main_loop(__attribute__((unused))void *dummy)
+{
+ uint32_t lcore_id;
+ uint32_t i, mode;
+ uint32_t rx_idx = 0;
+ uint32_t wt_idx = 0;
+ uint32_t tx_idx = 0;
+ struct thread_conf *rx_confs[MAX_DATA_STREAMS];
+ struct thread_conf *wt_confs[MAX_DATA_STREAMS];
+ struct thread_conf *tx_confs[MAX_DATA_STREAMS];
+
+ memset(rx_confs, 0, sizeof(rx_confs));
+ memset(wt_confs, 0, sizeof(wt_confs));
+ memset(tx_confs, 0, sizeof(tx_confs));
+
+
+ mode = APP_MODE_NONE;
+ lcore_id = rte_lcore_id();
+
+ for (i = 0; i < nb_pfc; i++) {
+ struct flow_conf *flow = &qos_conf[i];
+
+ if (flow->rx_core == lcore_id) {
+ flow->rx_thread.rx_port = flow->rx_port;
+ flow->rx_thread.rx_ring = flow->rx_ring;
+ flow->rx_thread.rx_queue = flow->rx_queue;
+
+ rx_confs[rx_idx++] = &flow->rx_thread;
+
+ mode |= APP_RX_MODE;
+ }
+ if (flow->tx_core == lcore_id) {
+ flow->tx_thread.tx_port = flow->tx_port;
+ flow->tx_thread.tx_ring = flow->tx_ring;
+ flow->tx_thread.tx_queue = flow->tx_queue;
+
+ tx_confs[tx_idx++] = &flow->tx_thread;
+
+ mode |= APP_TX_MODE;
+ }
+ if (flow->wt_core == lcore_id) {
+ flow->wt_thread.rx_ring = flow->rx_ring;
+ flow->wt_thread.tx_ring = flow->tx_ring;
+ flow->wt_thread.tx_port = flow->tx_port;
+ flow->wt_thread.sched_port = flow->sched_port;
+
+ wt_confs[wt_idx++] = &flow->wt_thread;
+
+ mode |= APP_WT_MODE;
+ }
+ }
+
+ if (mode == APP_MODE_NONE) {
+ RTE_LOG(INFO, APP, "lcore %u has nothing to do\n", lcore_id);
+ return -1;
+ }
+
+ if (mode == (APP_RX_MODE | APP_WT_MODE)) {
+ RTE_LOG(INFO, APP, "lcore %u was configured for both RX and WT !!!\n",
+ lcore_id);
+ return -1;
+ }
+
+ RTE_LOG(INFO, APP, "entering main loop on lcore %u\n", lcore_id);
+ /* initialize mbuf memory */
+ if (mode == APP_RX_MODE) {
+ for (i = 0; i < rx_idx; i++) {
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u reading port %hu\n",
+ i, lcore_id, rx_confs[i]->rx_port);
+ }
+
+ app_rx_thread(rx_confs);
+ }
+ else if (mode == (APP_TX_MODE | APP_WT_MODE)) {
+ for (i = 0; i < wt_idx; i++) {
+ wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *)
+ * burst_conf.tx_burst, CACHE_LINE_SIZE);
+
+ if (wt_confs[i]->m_table == NULL)
+ rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u sched+write port %hu\n",
+ i, lcore_id, wt_confs[i]->tx_port);
+ }
+
+ app_mixed_thread(wt_confs);
+ }
+ else if (mode == APP_TX_MODE) {
+ for (i = 0; i < tx_idx; i++) {
+ tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *)
+ * burst_conf.tx_burst, CACHE_LINE_SIZE);
+
+ if (tx_confs[i]->m_table == NULL)
+ rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u writing port %hu\n",
+ i, lcore_id, tx_confs[i]->tx_port);
+ }
+
+ app_tx_thread(tx_confs);
+ }
+ else if (mode == APP_WT_MODE){
+ for (i = 0; i < wt_idx; i++) {
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u scheduling \n", i, lcore_id);
+ }
+
+ app_worker_thread(wt_confs);
+ }
+
+ return 0;
+}
+
+static void
+app_stat(void)
+{
+ uint32_t i;
+ struct rte_eth_stats stats;
+ static struct rte_eth_stats rx_stats[MAX_DATA_STREAMS];
+ static struct rte_eth_stats tx_stats[MAX_DATA_STREAMS];
+
+ /* print statistics */
+ for(i = 0; i < nb_pfc; i++) {
+ struct flow_conf *flow = &qos_conf[i];
+
+ rte_eth_stats_get(flow->rx_port, &stats);
+ printf("\nRX port %hu: rx: %"PRIu64 " err: %"PRIu64 " no_mbuf: %"PRIu64 "\n",
+ flow->rx_port,
+ stats.ipackets - rx_stats[i].ipackets,
+ stats.ierrors - rx_stats[i].ierrors,
+ stats.rx_nombuf - rx_stats[i].rx_nombuf);
+ memcpy(&rx_stats[i], &stats, sizeof(stats));
+
+ rte_eth_stats_get(flow->tx_port, &stats);
+ printf("TX port %hu: tx: %" PRIu64 " err: %" PRIu64 "\n",
+ flow->tx_port,
+ stats.opackets - tx_stats[i].opackets,
+ stats.oerrors - tx_stats[i].oerrors);
+ memcpy(&tx_stats[i], &stats, sizeof(stats));
+
+ //printf("MP = %d\n", rte_mempool_count(conf->app_pktmbuf_pool));
+
+#if APP_COLLECT_STAT
+ printf("-------+------------+------------+\n");
+ printf(" | received | dropped |\n");
+ printf("-------+------------+------------+\n");
+ printf(" RX | %10" PRIu64 " | %10" PRIu64 " |\n",
+ flow->rx_thread.stat.nb_rx,
+ flow->rx_thread.stat.nb_drop);
+ printf("QOS+TX | %10" PRIu64 " | %10" PRIu64 " | pps: %"PRIu64 " \n",
+ flow->wt_thread.stat.nb_rx,
+ flow->wt_thread.stat.nb_drop,
+ flow->wt_thread.stat.nb_rx - flow->wt_thread.stat.nb_drop);
+ printf("-------+------------+------------+\n");
+
+ memset(&flow->rx_thread.stat, 0, sizeof(struct thread_stat));
+ memset(&flow->wt_thread.stat, 0, sizeof(struct thread_stat));
+#endif
+ }
+}
+
+
+
+int
+MAIN(int argc, char **argv)
+{
+ int ret;
+
+ ret = app_parse_args(argc, argv);
+ if (ret < 0)
+ return -1;
+
+ ret = app_init();
+ if (ret < 0)
+ return -1;
+
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(app_main_loop, NULL, SKIP_MASTER);
+
+ /* print statistics every second */
+ while(1) {
+ sleep(1);
+ app_stat();
+ }
+}
+
+
+
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
new file mode 100755
index 0000000..243064c
--- /dev/null
+++ b/examples/qos_sched/main.h
@@ -0,0 +1,186 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_sched.h>
+
+#ifdef RTE_EXEC_ENV_BAREMETAL
+#error "Baremetal is not supported"
+#else
+#define MAIN main
+#endif
+
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define APP_RX_DESC_DEFAULT 128
+#define APP_TX_DESC_DEFAULT 256
+
+#define MBUF_SIZE (1528 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define APP_RING_SIZE (8*1024)
+#define NB_MBUF (64*1024*32)
+
+#define MAX_PKT_RX_BURST 64
+#define PKT_ENQUEUE 64
+#define PKT_DEQUEUE 32
+#define MAX_PKT_TX_BURST 64
+
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
+
+#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */
+
+#define BURST_TX_DRAIN_US 100
+
+#define MAX_DATA_STREAMS (RTE_MAX_LCORE/2)
+#define MAX_SCHED_SUBPORTS 8
+#define MAX_SCHED_PIPES 4096
+
+#ifndef APP_COLLECT_STAT
+#define APP_COLLECT_STAT 1
+#endif
+
+#if APP_COLLECT_STAT
+#define APP_STATS_ADD(stat,val) (stat) += (val)
+#else
+#define APP_STATS_ADD(stat,val) do {(void) (val);} while (0)
+#endif
+
+struct thread_stat
+{
+ uint64_t nb_rx;
+ uint64_t nb_drop;
+};
+
+
+struct thread_conf
+{
+ uint32_t counter;
+ uint32_t n_mbufs;
+ struct rte_mbuf **m_table;
+
+ uint8_t rx_port;
+ uint8_t tx_port;
+ uint16_t rx_queue;
+ uint16_t tx_queue;
+ struct rte_ring *rx_ring;
+ struct rte_ring *tx_ring;
+ struct rte_sched_port *sched_port;
+
+#if APP_COLLECT_STAT
+ struct thread_stat stat;
+#endif
+} __rte_cache_aligned;
+
+
+struct flow_conf
+{
+ uint32_t rx_core;
+ uint32_t wt_core;
+ uint32_t tx_core;
+ uint8_t rx_port;
+ uint8_t tx_port;
+ uint16_t rx_queue;
+ uint16_t tx_queue;
+ struct rte_ring *rx_ring;
+ struct rte_ring *tx_ring;
+ struct rte_sched_port *sched_port;
+ struct rte_mempool *mbuf_pool;
+
+ struct thread_conf rx_thread;
+ struct thread_conf wt_thread;
+ struct thread_conf tx_thread;
+};
+
+
+struct ring_conf
+{
+ uint32_t rx_size;
+ uint32_t ring_size;
+ uint32_t tx_size;
+};
+
+struct burst_conf
+{
+ uint16_t rx_burst;
+ uint16_t ring_burst;
+ uint16_t qos_dequeue;
+ uint16_t tx_burst;
+};
+
+struct ring_thresh
+{
+ uint8_t pthresh; /**< Ring prefetch threshold. */
+ uint8_t hthresh; /**< Ring host threshold. */
+ uint8_t wthresh; /**< Ring writeback threshold. */
+};
+
+extern uint32_t nb_pfc;
+extern const char *cfg_profile;
+extern struct flow_conf qos_conf[];
+extern int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+extern struct ring_conf ring_conf;
+extern struct burst_conf burst_conf;
+extern struct ring_thresh rx_thresh;
+extern struct ring_thresh tx_thresh;
+
+extern struct rte_sched_port_params port_params;
+
+int MAIN(int argc, char **argv);
+int app_parse_args(int argc, char **argv);
+int app_init(void);
+
+void app_rx_thread(struct thread_conf **qconf);
+void app_tx_thread(struct thread_conf **qconf);
+void app_worker_thread(struct thread_conf **qconf);
+void app_mixed_thread(struct thread_conf **qconf);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MAIN_H_ */
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
new file mode 100644
index 0000000..5caa996
--- /dev/null
+++ b/examples/qos_sched/profile.cfg
@@ -0,0 +1,109 @@
+; BSD LICENSE
+;
+; Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+
+; This file enables the following hierarchical scheduler configuration for each
+; 10GbE output port:
+; * Single subport (subport 0):
+; - Subport rate set to 100% of port rate
+; - Each of the 4 traffic classes has rate set to 100% of port rate
+; * 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
+; - Pipe rate set to 1/4K of port rate
+; - Each of the 4 traffic classes has rate set to 100% of pipe rate
+; - Within each traffic class, the byte-level WRR weights for the 4 queues
+; are set to 1:1:1:1
+;
+; For more details, please refer to chapter "Quality of Service (QoS) Framework"
+; of Intel Data Plane Development Kit (Intel DPDK) Programmer's Guide.
+
+; Port configuration
+[port]
+frame overhead = 24
+number of subports per port = 1
+number of pipes per subport = 4096
+queue sizes = 64 64 64 64
+
+; Subport configuration
+[subport 0]
+tb rate = 1250000000 ; Bytes per second
+tb size = 1000000 ; Bytes
+
+tc 0 rate = 1250000000 ; Bytes per second
+tc 1 rate = 1250000000 ; Bytes per second
+tc 2 rate = 1250000000 ; Bytes per second
+tc 3 rate = 1250000000 ; Bytes per second
+tc period = 10 ; Milliseconds
+tc oversubscription period = 10; Milliseconds
+
+pipe 0-4095 = 0 ; These pipes are configured with pipe profile 0
+
+; Pipe configuration
+[pipe profile 0]
+tb rate = 305175 ; Bytes per second
+tb size = 1000000 ; Bytes
+
+tc 0 rate = 305175 ; Bytes per second
+tc 1 rate = 305175 ; Bytes per second
+tc 2 rate = 305175 ; Bytes per second
+tc 3 rate = 305175 ; Bytes per second
+tc period = 40 ; Milliseconds
+
+tc 0 oversubscription weight = 1
+tc 1 oversubscription weight = 1
+tc 2 oversubscription weight = 1
+tc 3 oversubscription weight = 1
+
+tc 0 wrr weights = 1 1 1 1
+tc 1 wrr weights = 1 1 1 1
+tc 2 wrr weights = 1 1 1 1
+tc 3 wrr weights = 1 1 1 1
+
+; RED params per traffic class and color (Green / Yellow / Red)
+[red]
+tc 0 wred min = 48 40 32
+tc 0 wred max = 64 64 64
+tc 0 wred inv prob = 10 10 10
+tc 0 wred weight = 9 9 9
+
+tc 1 wred min = 48 40 32
+tc 1 wred max = 64 64 64
+tc 1 wred inv prob = 10 10 10
+tc 1 wred weight = 9 9 9
+
+tc 2 wred min = 48 40 32
+tc 2 wred max = 64 64 64
+tc 2 wred inv prob = 10 10 10
+tc 2 wred weight = 9 9 9
+
+tc 3 wred min = 48 40 32
+tc 3 wred max = 64 64 64
+tc 3 wred inv prob = 10 10 10
+tc 3 wred weight = 9 9 9
diff --git a/lib/Makefile b/lib/Makefile
index 122ba42..74162f2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -48,6 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
+DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
DIRS-$(CONFIG_RTE_LIBRTE_PMAC) += librte_pmac
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index b0c735f..bf4a3e2 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -74,6 +74,7 @@ extern struct rte_logs rte_logs;
#define RTE_LOGTYPE_PMAC 0x00000200 /**< Log related to PMAC. */
#define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */
#define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */
+#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */
/* these log types can be used in an application */
#define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 27988c3..5d610cb 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -158,6 +158,7 @@ struct rte_pktmbuf {
uint16_t hash;
uint16_t id;
} fdir; /**< Filter identifier if FDIR enabled */
+ uint32_t sched; /**< Hierarchical scheduler */
} hash; /**< hash information */
};
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
new file mode 100644
index 0000000..5050db0
--- /dev/null
+++ b/lib/librte_sched/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_sched.a
+
+CFLAGS += -O3
+CFLAGS += -g
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_approx.c b/lib/librte_sched/rte_approx.c
new file mode 100644
index 0000000..c05e2a7
--- /dev/null
+++ b/lib/librte_sched/rte_approx.c
@@ -0,0 +1,197 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdlib.h>
+
+#include "rte_approx.h"
+
+/*
+ * Based on paper "Approximating Rational Numbers by Fractions" by Michal
+ * Forisek forisek@dcs.fmph.uniba.sk
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ *
+ * http://people.ksp.sk/~misof/publications/2007approx.pdf
+ */
+
+/* fraction comparison: compare (a/b) and (c/d) */
+static inline uint32_t
+less(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+ return (a*d < b*c);
+}
+
+static inline uint32_t
+less_or_equal(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+ return (a*d <= b*c);
+}
+
+/* check whether a/b is a valid approximation */
+static inline uint32_t
+matches(uint32_t a, uint32_t b,
+ uint32_t alpha_num, uint32_t d_num, uint32_t denum)
+{
+ if (less_or_equal(a, b, alpha_num - d_num, denum))
+ return 0;
+
+ if (less(a ,b, alpha_num + d_num, denum))
+ return 1;
+
+ return 0;
+}
+
+static inline void
+find_exact_solution_left(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b,
+ uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+ uint32_t k_num = denum * p_b - (alpha_num + d_num) * q_b;
+ uint32_t k_denum = (alpha_num + d_num) * q_a - denum * p_a;
+ uint32_t k = (k_num / k_denum) + 1;
+
+ *p = p_b + k * p_a;
+ *q = q_b + k * q_a;
+}
+
+static inline void
+find_exact_solution_right(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b,
+ uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+ uint32_t k_num = - denum * p_b + (alpha_num - d_num) * q_b;
+ uint32_t k_denum = - (alpha_num - d_num) * q_a + denum * p_a;
+ uint32_t k = (k_num / k_denum) + 1;
+
+ *p = p_b + k * p_a;
+ *q = q_b + k * q_a;
+}
+
+static int
+find_best_rational_approximation(uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+ uint32_t p_a, q_a, p_b, q_b;
+
+ /* check assumptions on the inputs */
+ if (!((0 < d_num) && (d_num < alpha_num) && (alpha_num < denum) && (d_num + alpha_num < denum))) {
+ return -1;
+ }
+
+ /* set initial bounds for the search */
+ p_a = 0;
+ q_a = 1;
+ p_b = 1;
+ q_b = 1;
+
+ while (1) {
+ uint32_t new_p_a, new_q_a, new_p_b, new_q_b;
+ uint32_t x_num, x_denum, x;
+ int aa, bb;
+
+ /* compute the number of steps to the left */
+ x_num = denum * p_b - alpha_num * q_b;
+ x_denum = - denum * p_a + alpha_num * q_a;
+ x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+ /* check whether we have a valid approximation */
+ aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+ bb = matches(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+ if (aa || bb) {
+ find_exact_solution_left(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+ return 0;
+ }
+
+ /* update the interval */
+ new_p_a = p_b + (x - 1) * p_a ;
+ new_q_a = q_b + (x - 1) * q_a;
+ new_p_b = p_b + x * p_a ;
+ new_q_b = q_b + x * q_a;
+
+ p_a = new_p_a ;
+ q_a = new_q_a;
+ p_b = new_p_b ;
+ q_b = new_q_b;
+
+ /* compute the number of steps to the right */
+ x_num = alpha_num * q_b - denum * p_b;
+ x_denum = - alpha_num * q_a + denum * p_a;
+ x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+ /* check whether we have a valid approximation */
+ aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+ bb = matches(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+ if (aa || bb) {
+ find_exact_solution_right(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+ return 0;
+ }
+
+ /* update the interval */
+ new_p_a = p_b + (x - 1) * p_a;
+ new_q_a = q_b + (x - 1) * q_a;
+ new_p_b = p_b + x * p_a;
+ new_q_b = q_b + x * q_a;
+
+ p_a = new_p_a;
+ q_a = new_q_a;
+ p_b = new_p_b;
+ q_b = new_q_b;
+ }
+}
+
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q)
+{
+ uint32_t alpha_num, d_num, denum;
+
+ /* Check input arguments */
+ if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) {
+ return -1;
+ }
+
+ if ((p == NULL) || (q == NULL)) {
+ return -2;
+ }
+
+ /* Compute alpha_num, d_num and denum */
+ denum = 1;
+ while (d < 1) {
+ alpha *= 10;
+ d *= 10;
+ denum *= 10;
+ }
+ alpha_num = (uint32_t) alpha;
+ d_num = (uint32_t) d;
+
+ /* Perform approximation */
+ return find_best_rational_approximation(alpha_num, d_num, denum, p, q);
+}
diff --git a/lib/librte_sched/rte_approx.h b/lib/librte_sched/rte_approx.h
new file mode 100644
index 0000000..d755afa
--- /dev/null
+++ b/lib/librte_sched/rte_approx.h
@@ -0,0 +1,76 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __INCLUDE_RTE_APPROX_H__
+#define __INCLUDE_RTE_APPROX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Rational Approximation
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ *
+ ***/
+
+#include <stdint.h>
+
+/**
+ * Find best rational approximation
+ *
+ * @param alpha
+ * Rational number to approximate
+ * @param d
+ * Precision for the rational approximation
+ * @param p
+ * Pointer to pre-allocated space where the numerator of the rational
+ * approximation will be stored when operation is successful
+ * @param q
+ * Pointer to pre-allocated space where the denominator of the rational
+ * approximation will be stored when operation is successful
+ * @return
+ * 0 upon success, error code otherwise
+ */
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_APPROX_H__ */
diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h
new file mode 100644
index 0000000..c52db32
--- /dev/null
+++ b/lib/librte_sched/rte_bitmap.h
@@ -0,0 +1,505 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __INCLUDE_RTE_BITMAP_H__
+#define __INCLUDE_RTE_BITMAP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Bitmap
+ *
+ * The bitmap component provides a mechanism to manage large arrays of bits
+ * through bit get/set/clear and bit array scan operations.
+ *
+ * The bitmap scan operation is optimized for 64-bit CPUs using 64-byte cache
+ * lines. The bitmap is hierarchically organized using two arrays (array1 and
+ * array2), with each bit in array1 being associated with a full cache line
+ * (512 bits) of bitmap bits, which are stored in array2: the bit in array1 is
+ * set only when there is at least one bit set within its associated array2
+ * bits, otherwise the bit in array1 is cleared. The read and write operations
+ * for array1 and array2 are always done in slabs of 64 bits.
+ *
+ * This bitmap is not thread safe. For lock free operation on a specific bitmap
+ * instance, a single writer thread performing bit set/clear operations is
+ * allowed, only the writer thread can do bitmap scan operations, while there
+ * can be several reader threads performing bit get operations in parallel with
+ * the writer thread. When the use of locking primitives is acceptable, the
+ * serialization of the bit set/clear and bitmap scan operations needs to be
+ * enforced by the caller, while the bit get operation does not require locking
+ * the bitmap.
+ *
+ ***/
+
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+#ifndef RTE_BITMAP_OPTIMIZATIONS
+#define RTE_BITMAP_OPTIMIZATIONS 1
+#endif
+#if RTE_BITMAP_OPTIMIZATIONS
+#include <tmmintrin.h>
+#endif
+
+/** Number of elements in array1. Each element in array1 is a 64-bit slab. */
+#ifndef RTE_BITMAP_ARRAY1_SIZE
+#define RTE_BITMAP_ARRAY1_SIZE 16
+#endif
+
+/* Slab */
+#define RTE_BITMAP_SLAB_BIT_SIZE 64
+#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6
+#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1)
+
+/* Cache line (CL) */
+#define RTE_BITMAP_CL_BIT_SIZE (CACHE_LINE_SIZE * 8)
+#define RTE_BITMAP_CL_BIT_SIZE_LOG2 9
+#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1)
+
+#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
+#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 3
+#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1)
+
+/** Bitmap data structure */
+struct rte_bitmap {
+ uint64_t array1[RTE_BITMAP_ARRAY1_SIZE]; /**< Bitmap array1 */
+ uint64_t *array2; /**< Bitmap array2 */
+ uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */
+ uint32_t array2_size; /**< Number of 64-bit slabs in array2 */
+
+ /* Context for the "scan next" operation */
+ uint32_t index1; /**< Bitmap scan: Index of current array1 slab */
+ uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
+ uint32_t index2; /**< Bitmap scan: Index of current array2 slab */
+ uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */
+} __rte_cache_aligned;
+
+static inline void
+__rte_bitmap_index1_inc(struct rte_bitmap *bmp)
+{
+ bmp->index1 = (bmp->index1 + 1) & (RTE_BITMAP_ARRAY1_SIZE - 1);
+}
+
+static inline uint64_t
+__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
+{
+ return ((~1lu) << bmp->offset1);
+}
+
+static inline void
+__rte_bitmap_index2_set(struct rte_bitmap *bmp)
+{
+ bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+}
+
+#if RTE_BITMAP_OPTIMIZATIONS
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+ if (likely(slab == 0)) {
+ return 0;
+ }
+
+ *pos = __builtin_ctzll(slab);
+ return 1;
+}
+
+#else
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+ uint64_t mask;
+ uint32_t i;
+
+ if (likely(slab == 0)) {
+ return 0;
+ }
+
+ for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) {
+ if (unlikely(slab & mask)) {
+ *pos = i;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#endif
+
+static inline void
+__rte_bitmap_scan_init(struct rte_bitmap *bmp)
+{
+ bmp->index1 = RTE_BITMAP_ARRAY1_SIZE - 1;
+ bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
+ __rte_bitmap_index2_set(bmp);
+ bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
+
+ bmp->go2 = 0;
+}
+
+/**
+ * Bitmap initialization
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param array2
+ * Base address of pre-allocated array2
+ * @param n_bits
+ * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_init(struct rte_bitmap *bmp, uint8_t *array2, uint32_t n_bits)
+{
+ uint32_t array1_size, array2_size;
+
+ /* Check input arguments */
+ if ((bmp == NULL) ||
+ (array2 == NULL) || (((uintptr_t) array2) & CACHE_LINE_MASK) ||
+ (n_bits == 0) || (n_bits & RTE_BITMAP_CL_BIT_MASK)){
+ return -1;
+ }
+
+ array2_size = n_bits / RTE_BITMAP_SLAB_BIT_SIZE;
+ array1_size = ((n_bits / RTE_BITMAP_CL_BIT_SIZE) + (RTE_BITMAP_SLAB_BIT_SIZE - 1)) / RTE_BITMAP_SLAB_BIT_SIZE;
+ if (array1_size > RTE_BITMAP_ARRAY1_SIZE){
+ return -1;
+ }
+
+ /* Setup bitmap */
+ memset(bmp, 0, sizeof(struct rte_bitmap));
+ bmp->array2 = (uint64_t *) array2;
+ bmp->array1_size = array1_size;
+ bmp->array2_size = array2_size;
+ __rte_bitmap_scan_init(bmp);
+
+ return 0;
+}
+
+/**
+ * Bitmap free
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_free(struct rte_bitmap *bmp)
+{
+ /* Check input arguments */
+ if (bmp == NULL) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap reset
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ */
+static inline void
+rte_bitmap_reset(struct rte_bitmap *bmp)
+{
+ memset(bmp->array1, 0, sizeof(bmp->array1));
+ memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
+ __rte_bitmap_scan_init(bmp);
+}
+
+/**
+ * Bitmap location prefetch into CPU L1 cache
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline void
+rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ slab2 = bmp->array2 + index2;
+ rte_prefetch0((void *) slab2);
+}
+
+/**
+ * Bitmap bit get
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 when bit is cleared, non-zero when bit is set
+ */
+static inline uint64_t
+rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2, offset2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ return ((*slab2) & (1lu << offset2));
+}
+
+/**
+ * Bitmap bit set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Set bit in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= 1lu << offset2;
+ *slab1 |= 1lu << offset1;
+}
+
+/**
+ * Bitmap slab set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position identifying the array2 slab
+ * @param slab
+ * Value to be assigned to the 64-bit slab in array2
+ */
+static inline void
+rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1;
+
+ /* Set bits in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= slab;
+ *slab1 |= 1lu << offset1;
+}
+
+static inline uint64_t
+__rte_bitmap_line_not_empty(uint64_t *slab2)
+{
+ uint64_t v1, v2, v3, v4;
+
+ v1 = slab2[0] | slab2[1];
+ v2 = slab2[2] | slab2[3];
+ v3 = slab2[4] | slab2[5];
+ v4 = slab2[6] | slab2[7];
+ v1 |= v2;
+ v3 |= v4;
+
+ return (v1 | v3);
+}
+
+/**
+ * Bitmap bit clear
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Clear bit in array2 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+
+ /* Return if array2 slab is not all-zeros */
+ *slab2 &= ~(1lu << offset2);
+ if (*slab2){
+ return;
+ }
+
+ /* Check the entire cache line of array2 for all-zeros */
+ index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
+ slab2 = bmp->array2 + index2;
+ if (__rte_bitmap_line_not_empty(slab2)) {
+ return;
+ }
+
+ /* The array2 cache line is all-zeros, so clear bit in array1 slab */
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab1 = bmp->array1 + index1;
+ *slab1 &= ~(1lu << offset1);
+
+ return;
+}
+
+static inline int
+__rte_bitmap_scan_search(struct rte_bitmap *bmp)
+{
+ uint64_t value1;
+ uint32_t i;
+
+ /* Check current array1 slab */
+ value1 = bmp->array1[bmp->index1];
+ value1 &= __rte_bitmap_mask1_get(bmp);
+
+ if (rte_bsf64(value1, &bmp->offset1)) {
+ return 1;
+ }
+
+ __rte_bitmap_index1_inc(bmp);
+ bmp->offset1 = 0;
+
+ /* Look for another array1 slab */
+ for (i = 0; i < RTE_BITMAP_ARRAY1_SIZE; i ++, __rte_bitmap_index1_inc(bmp)) {
+ value1 = bmp->array1[bmp->index1];
+
+ if (rte_bsf64(value1, &bmp->offset1)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline void
+__rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
+{
+ __rte_bitmap_index2_set(bmp);
+ bmp->go2 = 1;
+ rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
+}
+
+static inline int
+__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ uint64_t *slab2;
+
+ slab2 = bmp->array2 + bmp->index2;
+ for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
+ if (*slab2) {
+ *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ *slab = *slab2;
+
+ bmp->index2 ++;
+ slab2 ++;
+ bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap scan (with automatic wrap-around)
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * When function call returns 1, pos contains the position of the next set
+ * bit, otherwise not modified
+ * @param slab
+ * When function call returns 1, slab contains the value of the entire 64-bit
+ * slab where the bit indicated by pos is located. Slabs are always 64-bit
+ * aligned, so the position of the first bit of the slab (this bit is not
+ * necessarily set) is pos / 64. Once a slab has been returned by the bitmap
+ * scan operation, the internal pointers of the bitmap are updated to point
+ * after this slab, so the same slab will not be returned again if it
+ * contains more than one bit which is set. When function call returns 0,
+ * slab is not modified.
+ * @return
+ * 0 if there is no bit set in the bitmap, 1 otherwise
+ */
+static inline int
+rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ /* Return data from current array2 line if available */
+ if (__rte_bitmap_scan_read(bmp, pos, slab)) {
+ return 1;
+ }
+
+ /* Look for non-empty array2 line */
+ if (__rte_bitmap_scan_search(bmp)) {
+ __rte_bitmap_scan_read_init(bmp);
+ __rte_bitmap_scan_read(bmp, pos, slab);
+ return 1;
+ }
+
+ /* Empty bitmap */
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_BITMAP_H__ */
diff --git a/lib/librte_sched/rte_red.c b/lib/librte_sched/rte_red.c
new file mode 100644
index 0000000..0eaf5a0
--- /dev/null
+++ b/lib/librte_sched/rte_red.c
@@ -0,0 +1,160 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <math.h>
+#include "rte_red.h"
+#include <rte_random.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+static int rte_red_init_done = 0; /**< Flag to indicate that global initialisation is done */
+uint32_t rte_red_rand_val = 0; /**< Random value cache */
+uint32_t rte_red_rand_seed = 0; /**< Seed for random number generation */
+
+/**
+ * table[i] = log2(1-Wq) * Scale * -1
+ * Wq = 1/(2^i)
+ */
+uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+
+/**
+ * table[i] = 2^(i/16) * Scale
+ */
+uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * @brief Initialize tables used to compute average
+ * queue size when queue is empty.
+ */
+static void
+__rte_red_init_tables(void)
+{
+ uint32_t i = 0;
+ double scale = 0.0;
+ double table_size = 0.0;
+
+ scale = (double)(1 << RTE_RED_SCALING);
+ table_size = (double)(DIM(rte_red_pow2_frac_inv));
+
+ for (i = 0; i < DIM(rte_red_pow2_frac_inv); i++) {
+ double m = (double)i;
+
+ rte_red_pow2_frac_inv[i] = (uint16_t) round(scale / pow(2, m / table_size));
+ }
+
+ scale = 1024.0;
+
+ RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == DIM(rte_red_log2_1_minus_Wq));
+
+ for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) {
+ double n = (double)i;
+ double Wq = pow(2, -n);
+ uint32_t index = i - RTE_RED_WQ_LOG2_MIN;
+
+ rte_red_log2_1_minus_Wq[index] = (uint16_t) round(-1.0 * scale * log2(1.0 - Wq));
+ /**
+ * Table entry of zero, corresponds to a Wq of zero
+ * which is not valid (avg would remain constant no
+ * matter how long the queue is empty). So we have
+ * to check for zero and round up to one.
+ */
+ if (rte_red_log2_1_minus_Wq[index] == 0) {
+ rte_red_log2_1_minus_Wq[index] = 1;
+ }
+ }
+}
+
+int
+rte_red_rt_data_init(struct rte_red *red)
+{
+ if (red == NULL)
+ return -1;
+
+ red->avg = 0;
+ red->count = 0;
+ red->q_time = 0;
+ return 0;
+}
+
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+ const uint16_t wq_log2,
+ const uint16_t min_th,
+ const uint16_t max_th,
+ const uint16_t maxp_inv)
+{
+ if (red_cfg == NULL) {
+ return -1;
+ }
+ if (max_th > RTE_RED_MAX_TH_MAX) {
+ return -2;
+ }
+ if (min_th >= max_th) {
+ return -3;
+ }
+ if (wq_log2 > RTE_RED_WQ_LOG2_MAX) {
+ return -4;
+ }
+ if (wq_log2 < RTE_RED_WQ_LOG2_MIN) {
+ return -5;
+ }
+ if (maxp_inv < RTE_RED_MAXP_INV_MIN) {
+ return -6;
+ }
+ if (maxp_inv > RTE_RED_MAXP_INV_MAX) {
+ return -7;
+ }
+
+ /**
+ * Initialize the RED module if not already done
+ */
+ if (!rte_red_init_done) {
+ rte_red_rand_seed = rte_rand();
+ rte_red_rand_val = rte_fast_rand();
+ __rte_red_init_tables();
+ rte_red_init_done = 1;
+ }
+
+ red_cfg->min_th = ((uint32_t) min_th) << (wq_log2 + RTE_RED_SCALING);
+ red_cfg->max_th = ((uint32_t) max_th) << (wq_log2 + RTE_RED_SCALING);
+ red_cfg->pa_const = (2 * (max_th - min_th) * maxp_inv) << RTE_RED_SCALING;
+ red_cfg->maxp_inv = maxp_inv;
+ red_cfg->wq_log2 = wq_log2;
+
+ return 0;
+}
diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h
new file mode 100644
index 0000000..debe556
--- /dev/null
+++ b/lib/librte_sched/rte_red.h
@@ -0,0 +1,454 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __RTE_RED_H_INCLUDED__
+#define __RTE_RED_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Random Early Detection (RED)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+#include <limits.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_branch_prediction.h>
+
+#define RTE_RED_SCALING 10 /**< Fraction size for fixed-point */
+#define RTE_RED_S (1 << 22) /**< Packet size multiplied by number of leaf queues */
+#define RTE_RED_MAX_TH_MAX 1023 /**< Max threshold limit in fixed point format */
+#define RTE_RED_WQ_LOG2_MIN 1 /**< Min inverse filter weight value */
+#define RTE_RED_WQ_LOG2_MAX 12 /**< Max inverse filter weight value */
+#define RTE_RED_MAXP_INV_MIN 1 /**< Min inverse mark probability value */
+#define RTE_RED_MAXP_INV_MAX 255 /**< Max inverse mark probability value */
+#define RTE_RED_2POW16 (1<<16) /**< 2 power 16 */
+#define RTE_RED_INT16_NBITS (sizeof(uint16_t) * CHAR_BIT)
+#define RTE_RED_WQ_LOG2_NUM (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
+
+#ifdef RTE_RED_DEBUG
+
+#define RTE_RED_ASSERT(exp) \
+if (!(exp)) { \
+ rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
+}
+
+#else
+
+#define RTE_RED_ASSERT(exp) do { } while(0)
+
+#endif /* RTE_RED_DEBUG */
+
+/**
+ * Externs
+ *
+ */
+extern uint32_t rte_red_rand_val;
+extern uint32_t rte_red_rand_seed;
+extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+extern uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * RED configuration parameters passed by user
+ *
+ */
+struct rte_red_params {
+ uint16_t min_th; /**< Minimum threshold for queue (max_th) */
+ uint16_t max_th; /**< Maximum threshold for queue (max_th) */
+ uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
+ uint16_t wq_log2; /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
+};
+
+/**
+ * RED configuration parameters
+ */
+struct rte_red_config {
+ uint32_t min_th; /**< min_th scaled in fixed-point format */
+ uint32_t max_th; /**< max_th scaled in fixed-point format */
+ uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
+ uint8_t maxp_inv; /**< maxp_inv */
+ uint8_t wq_log2; /**< wq_log2 */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_red {
+ uint32_t avg; /**< Average queue size (avg), scaled in fixed-point format */
+ uint32_t count; /**< Number of packets since last marked packet (count) */
+ uint64_t q_time; /**< Start of the queue idle time (q_time) */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param [in,out] data pointer to RED runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_rt_data_init(struct rte_red *red);
+
+/**
+ * @brief Configures a single RED configuration parameter structure.
+ *
+ * @param [in,out] config pointer to a RED configuration parameter structure
+ * @param [in] wq_log2 log2 of the filter weight, valid range is:
+ * RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
+ * @param [in] min_th queue minimum threshold in number of packets
+ * @param [in] max_th queue maximum threshold in number of packets
+ * @param [in] maxp_inv inverse maximum mark probability
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+ const uint16_t wq_log2,
+ const uint16_t min_th,
+ const uint16_t max_th,
+ const uint16_t maxp_inv);
+
+/**
+ * @brief Generate random number for RED
+ *
+ * Implemenetation based on:
+ * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
+ *
+ * 10 bit shift has been found through empirical tests (was 16).
+ *
+ * @return Random number between 0 and (2^22 - 1)
+ */
+static inline uint32_t
+rte_fast_rand(void)
+{
+ rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
+ return (rte_red_rand_seed >> 10);
+}
+
+/**
+ * @brief calculate factor to scale average queue size when queue
+ * becomes empty
+ *
+ * @param [in] wq_log2, where EWMA filter weight wq = 1/(2 ^ wq_log2)
+ * @param [in] m exponent in the computed value (1 - wq) ^ m
+ *
+ * @return computed value
+ * @retval ((1 - wq) ^ m) scaled in fixed-point format
+ */
+static inline uint16_t
+__rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
+{
+ uint32_t n = 0;
+ uint32_t f = 0;
+
+ /**
+ * Basic math tells us that:
+ * a^b = 2^(b * log2(a) )
+ *
+ * in our case:
+ * a = (1-Wq)
+ * b = m
+ * Wq = 1/ (2^log2n)
+ *
+ * So we are computing this equation:
+ * factor = 2 ^ ( m * log2(1-Wq))
+ *
+ * First we are computing:
+ * n = m * log2(1-Wq)
+ *
+ * To avoid dealing with signed numbers log2 values are positive
+ * but they should be negative because (1-Wq) is always < 1.
+ * Contents of log2 table values are also scaled for precision.
+ */
+
+ n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
+
+ /**
+ * The tricky part is computing 2^n, for this I split n into
+ * integer part and fraction part.
+ * f - is fraction part of n
+ * n - is integer part of original n
+ *
+ * Now using basic math we compute 2^n:
+ * 2^(f+n) = 2^f * 2^n
+ * 2^f - we use lookup table
+ * 2^n - can be replaced with bit shift right oeprations
+ */
+
+ f = (n >> 6) & 0xf;
+ n >>= 10;
+
+ if (n < RTE_RED_SCALING)
+ return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
+
+ return 0;
+}
+
+/**
+ * @brief Updates queue average in condition when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] time current time stamp
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ const uint64_t time)
+{
+ uint64_t time_diff = 0, m = 0;
+
+ RTE_RED_ASSERT(red_cfg != NULL);
+ RTE_RED_ASSERT(red != NULL);
+
+ red->count ++;
+
+ /**
+ * We compute avg but we don't compare avg against
+ * min_th or max_th, nor calculate drop probability
+ */
+ time_diff = time - red->q_time;
+
+ /**
+ * m is the number of packets that might have arrived while the queue was empty.
+ * In this case we have time stamps provided by scheduler in byte units (bytes
+ * transmitted on network port). Such time stamp translates into time units as
+ * port speed is fixed but such approach simplifies the code.
+ */
+ m = time_diff / RTE_RED_S;
+
+ /**
+ * Check that m will fit into 16-bit unsigned integer
+ */
+ if (m >= RTE_RED_2POW16) {
+ red->avg = 0;
+ } else {
+ red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
+ }
+
+ return 0;
+}
+
+/**
+ * Drop probability (Sally Floyd and Van Jacobson):
+ *
+ * pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
+ * pa = pb / (2 - count * pb)
+ *
+ *
+ * (1 / maxp_inv) * (avg - min_th)
+ * ---------------------------------
+ * max_th - min_th
+ * pa = -----------------------------------------------
+ * count * (1 / maxp_inv) * (avg - min_th)
+ * 2 - -----------------------------------------
+ * max_th - min_th
+ *
+ *
+ * avg - min_th
+ * pa = -----------------------------------------------------------
+ * 2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
+ *
+ *
+ * We define pa_const as: pa_const = 2 * (max_th - min_th) * maxp_inv. Then:
+ *
+ *
+ * avg - min_th
+ * pa = -----------------------------------
+ * pa_const - count * (avg - min_th)
+ */
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on mark probability
+ * criteria
+ *
+ * @param [in] config pointer to structure defining RED parameters
+ * @param [in,out] data pointer to RED runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
+{
+ uint32_t pa_num = 0; /* numerator of drop-probability */
+ uint32_t pa_den = 0; /* denominator of drop-probability */
+ uint32_t pa_num_count = 0;
+
+ pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
+
+ pa_num_count = red->count * pa_num;
+
+ if (red_cfg->pa_const <= pa_num_count)
+ return 1;
+
+ pa_den = red_cfg->pa_const - pa_num_count;
+
+ /* If drop, generate and save random number to be used next time */
+ if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
+ rte_red_rand_val = rte_fast_rand();
+
+ return 1;
+ }
+
+ /* No drop */
+ return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] q current queue size (measured in packets)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ const unsigned q)
+{
+ RTE_RED_ASSERT(red_cfg != NULL);
+ RTE_RED_ASSERT(red != NULL);
+
+ /**
+ * EWMA filter (Sally Floyd and Van Jacobson):
+ * avg = (1 - wq) * avg + wq * q
+ * avg = avg + q * wq - avg * wq
+ *
+ * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
+ * avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
+ *
+ * By using shift left/right operations, we get:
+ * avg_s = avg_s + (q << N) - (avg_s >> n)
+ * avg_s += (q << N) - (avg_s >> n)
+ */
+
+ /* avg update */
+ red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
+
+ /* avg < min_th: do not mark the packet */
+ if (red->avg < red_cfg->min_th) {
+ red->count ++;
+ return 0;
+ }
+
+ /* min_th <= avg < max_th: mark the packet with pa probability */
+ if (red->avg < red_cfg->max_th) {
+ if (!__rte_red_drop(red_cfg, red)) {
+ red->count ++;
+ return 0;
+ }
+
+ red->count = 0;
+ return 2;
+ }
+
+ /* max_th <= avg: always mark the packet */
+ red->count = 0;
+ return 1;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data based on new queue size value.
+ * Based on new queue average and RED configuration parameters
+ * gives verdict whether to enqueue or drop the packet.
+ *
+ * @param [in] config pointer to a RED configuration parameter structure
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] q updated queue size in packets
+ * @param [in] time current time stamp
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criteria
+ * @retval 2 drop the packet based on mark probability criteria
+ */
+static inline int
+rte_red_enqueue(const struct rte_red_config *red_cfg,
+ struct rte_red *red,
+ const unsigned q,
+ const uint64_t time)
+{
+ RTE_RED_ASSERT(red_cfg != NULL);
+ RTE_RED_ASSERT(red != NULL);
+
+ if (q != 0) {
+ return rte_red_enqueue_nonempty(red_cfg, red, q);
+ } else {
+ return rte_red_enqueue_empty(red_cfg, red, time);
+ }
+}
+
+/**
+ * @brief Callback to records time that queue became empty
+ *
+ * @param [in,out] data pointer to RED runtime data
+ * @param [in] time current time stamp
+ */
+static inline void
+rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
+{
+ red->q_time = time;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_RED_H_INCLUDED__ */
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
new file mode 100644
index 0000000..daa1a0d
--- /dev/null
+++ b/lib/librte_sched/rte_sched.c
@@ -0,0 +1,2129 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+#include <rte_mbuf.h>
+
+#include "rte_sched.h"
+#include "rte_bitmap.h"
+#include "rte_sched_common.h"
+#include "rte_approx.h"
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+#ifndef RTE_SCHED_DEBUG
+#define RTE_SCHED_DEBUG 0
+#endif
+
+#ifndef RTE_SCHED_OPTIMIZATIONS
+#define RTE_SCHED_OPTIMIZATIONS 0
+#endif
+
+#if RTE_SCHED_OPTIMIZATIONS
+#include <immintrin.h>
+#endif
+
+#define RTE_SCHED_ENQUEUE 1
+
+#define RTE_SCHED_TS 1
+
+#if RTE_SCHED_TS == 0 /* Infinite credits. Traffic shaping disabled. */
+#define RTE_SCHED_TS_CREDITS_UPDATE 0
+#define RTE_SCHED_TS_CREDITS_CHECK 0
+#else /* Real Credits. Full traffic shaping implemented. */
+#define RTE_SCHED_TS_CREDITS_UPDATE 1
+#define RTE_SCHED_TS_CREDITS_CHECK 1
+#endif
+
+#ifndef RTE_SCHED_TB_RATE_CONFIG_ERR
+#define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7)
+#endif
+
+#define RTE_SCHED_WRR 1
+
+#ifndef RTE_SCHED_WRR_SHIFT
+#define RTE_SCHED_WRR_SHIFT 3
+#endif
+
+#ifndef RTE_SCHED_PORT_N_GRINDERS
+#define RTE_SCHED_PORT_N_GRINDERS 8
+#endif
+#if (RTE_SCHED_PORT_N_GRINDERS == 0) || (RTE_SCHED_PORT_N_GRINDERS & (RTE_SCHED_PORT_N_GRINDERS - 1))
+#error Number of grinders must be non-zero and a power of 2
+#endif
+#if (RTE_SCHED_OPTIMIZATIONS && (RTE_SCHED_PORT_N_GRINDERS != 8))
+#error Number of grinders must be 8 when RTE_SCHED_OPTIMIZATIONS is set
+#endif
+
+#define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE)
+
+#define RTE_SCHED_PIPE_INVALID UINT32_MAX
+
+#define RTE_SCHED_BMP_POS_INVALID UINT32_MAX
+
+struct rte_sched_subport {
+ /* Token bucket (TB) */
+ uint64_t tb_time; /* time of last update */
+ uint32_t tb_period;
+ uint32_t tb_credits_per_period;
+ uint32_t tb_size;
+ uint32_t tb_credits;
+
+ /* Traffic classes (TCs) */
+ uint64_t tc_time; /* time of next update */
+ uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t tc_period;
+
+ /* TC oversubscription */
+ uint32_t tc_ov_period;
+ uint64_t tc_ov_time;
+ uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint8_t tc_ov_period_id;
+ uint8_t tc_ov[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t tc_ov_n[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ double tc_ov_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+ /* Statistics */
+ struct rte_sched_subport_stats stats;
+};
+
+struct rte_sched_pipe_profile {
+ /* Token bucket (TB) */
+ uint32_t tb_period;
+ uint32_t tb_credits_per_period;
+ uint32_t tb_size;
+
+ /* Pipe traffic classes */
+ uint32_t tc_period;
+ uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+ /* Pipe queues */
+ uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_PIPE];
+};
+
+struct rte_sched_pipe {
+ /* Token bucket (TB) */
+ uint64_t tb_time; /* time of last update */
+ uint32_t tb_credits;
+
+ /* Pipe profile and flags */
+ uint32_t profile;
+
+ /* Traffic classes (TCs) */
+ uint64_t tc_time; /* time of next update */
+ uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+ /* Weighted Round Robin (WRR) */
+ uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE];
+
+ /* TC oversubscription */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ uint32_t tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint8_t tc_ov_period_id;
+#else
+ uint64_t reserved;
+#endif
+} __rte_cache_aligned;
+
+struct rte_sched_queue {
+ uint16_t qw;
+ uint16_t qr;
+};
+
+struct rte_sched_queue_extra {
+ struct rte_sched_queue_stats stats;
+#ifdef RTE_SCHED_RED
+ struct rte_red red;
+#endif
+};
+
+enum grinder_state {
+ e_GRINDER_PREFETCH_PIPE = 0,
+ e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
+ e_GRINDER_PREFETCH_MBUF,
+ e_GRINDER_READ_MBUF
+};
+
+struct rte_sched_grinder {
+ /* Pipe cache */
+ uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
+ uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
+ uint32_t pcache_w;
+ uint32_t pcache_r;
+
+ /* Current pipe */
+ enum grinder_state state;
+ uint32_t productive;
+ uint32_t pindex;
+ struct rte_sched_subport *subport;
+ struct rte_sched_pipe *pipe;
+ struct rte_sched_pipe_profile *pipe_params;
+
+ /* TC cache */
+ uint8_t tccache_qmask[4];
+ uint32_t tccache_qindex[4];
+ uint32_t tccache_w;
+ uint32_t tccache_r;
+
+ /* Current TC */
+ uint32_t tc_index;
+ struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint16_t qsize;
+ uint32_t qmask;
+ uint32_t qpos;
+ struct rte_mbuf *pkt;
+
+ double ov_coef;
+
+ uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+};
+
+struct rte_sched_port {
+ /* User parameters */
+ uint32_t n_subports_per_port;
+ uint32_t n_pipes_per_subport;
+ uint32_t rate;
+ uint32_t frame_overhead;
+ uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t n_pipe_profiles;
+#ifdef RTE_SCHED_RED
+ struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS];
+#endif
+
+ /* Timing */
+ uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */
+ uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */
+ uint64_t time; /* Current NIC TX time measured in bytes */
+ double cycles_per_byte; /* CPU cycles per byte */
+
+ /* Scheduling loop detection */
+ uint32_t pipe_loop;
+ uint32_t pipe_exhaustion;
+
+ /* Bitmap */
+ struct rte_bitmap bmp;
+ uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
+
+ /* Grinders */
+ struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
+ uint32_t busy_grinders;
+ struct rte_mbuf **pkts_out;
+ uint32_t n_pkts_out;
+
+ /* Queue base calculation */
+ uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
+ uint32_t qsize_sum;
+
+ /* Large data structures */
+ struct rte_sched_subport *subport;
+ struct rte_sched_pipe *pipe;
+ struct rte_sched_queue *queue;
+ struct rte_sched_queue_extra *queue_extra;
+ struct rte_sched_pipe_profile *pipe_profiles;
+ uint8_t *bmp_array;
+ struct rte_mbuf **queue_array;
+ uint8_t memory[0] __rte_cache_aligned;
+} __rte_cache_aligned;
+
+enum rte_sched_port_array {
+ e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0,
+ e_RTE_SCHED_PORT_ARRAY_PIPE,
+ e_RTE_SCHED_PORT_ARRAY_QUEUE,
+ e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA,
+ e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES,
+ e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY,
+ e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY,
+ e_RTE_SCHED_PORT_ARRAY_TOTAL,
+};
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline uint32_t
+rte_sched_port_queues_per_subport(struct rte_sched_port *port)
+{
+ return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport;
+}
+
+#endif
+
+static inline uint32_t
+rte_sched_port_queues_per_port(struct rte_sched_port *port)
+{
+ return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port;
+}
+
+static int
+rte_sched_port_check_params(struct rte_sched_port_params *params)
+{
+ uint32_t i, j;
+
+ if (params == NULL) {
+ return -1;
+ }
+
+ /* name */
+ if (params->name == NULL) {
+ return -2;
+ }
+
+ /* socket */
+ if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES)) {
+ return -3;
+ }
+
+ /* rate */
+ if (params->rate == 0) {
+ return -4;
+ }
+
+ /* n_subports_per_port: non-zero, power of 2 */
+ if ((params->n_subports_per_port == 0) || (!rte_is_power_of_2(params->n_subports_per_port))) {
+ return -5;
+ }
+
+ /* n_pipes_per_subport: non-zero, power of 2 */
+ if ((params->n_pipes_per_subport == 0) || (!rte_is_power_of_2(params->n_pipes_per_subport))) {
+ return -6;
+ }
+
+ /* qsize: non-zero, power of 2, no bigger than 32K (due to 16-bit read/write pointers) */
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ uint16_t qsize = params->qsize[i];
+
+ if ((qsize == 0) || (!rte_is_power_of_2(qsize))) {
+ return -7;
+ }
+ }
+
+ /* pipe_profiles and n_pipe_profiles */
+ if ((params->pipe_profiles == NULL) ||
+ (params->n_pipe_profiles == 0) ||
+ (params->n_pipe_profiles > RTE_SCHED_PIPE_PROFILES_PER_PORT)) {
+ return -8;
+ }
+
+ for (i = 0; i < params->n_pipe_profiles; i ++) {
+ struct rte_sched_pipe_params *p = params->pipe_profiles + i;
+
+ /* TB rate: non-zero, not greater than port rate */
+ if ((p->tb_rate == 0) || (p->tb_rate > params->rate)) {
+ return -9;
+ }
+
+ /* TB size: non-zero */
+ if (p->tb_size == 0) {
+ return -10;
+ }
+
+ /* TC rate: non-zero, less than pipe rate */
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+ if ((p->tc_rate[j] == 0) || (p->tc_rate[j] > p->tb_rate)) {
+ return -11;
+ }
+ }
+
+ /* TC period: non-zero */
+ if (p->tc_period == 0) {
+ return -12;
+ }
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ /* TC oversubscription weights: non-zero */
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+ if (p->tc_ov_weight[j] == 0) {
+ return -13;
+ }
+ }
+#endif
+
+ /* Queue WRR weights: non-zero */
+ for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j ++) {
+ if (p->wrr_weights[j] == 0) {
+ return -14;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t
+rte_sched_port_get_array_base(struct rte_sched_port_params *params, enum rte_sched_port_array array)
+{
+ uint32_t n_subports_per_port = params->n_subports_per_port;
+ uint32_t n_pipes_per_subport = params->n_pipes_per_subport;
+ uint32_t n_pipes_per_port = n_pipes_per_subport * n_subports_per_port;
+ uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport * n_subports_per_port;
+
+ uint32_t size_subport = n_subports_per_port * sizeof(struct rte_sched_subport);
+ uint32_t size_pipe = n_pipes_per_port * sizeof(struct rte_sched_pipe);
+ uint32_t size_queue = n_queues_per_port * sizeof(struct rte_sched_queue);
+ uint32_t size_queue_extra = n_queues_per_port * sizeof(struct rte_sched_queue_extra);
+ uint32_t size_pipe_profiles = RTE_SCHED_PIPE_PROFILES_PER_PORT * sizeof(struct rte_sched_pipe_profile);
+ uint32_t size_bmp_array = n_queues_per_port / 8;
+ uint32_t size_per_pipe_queue_array, size_queue_array;
+
+ uint32_t base, i;
+
+ size_per_pipe_queue_array = 0;
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ size_per_pipe_queue_array += RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * params->qsize[i] * sizeof(struct rte_mbuf *);
+ }
+ size_queue_array = n_pipes_per_port * size_per_pipe_queue_array;
+
+ base = 0;
+
+ if (array == e_RTE_SCHED_PORT_ARRAY_SUBPORT) return base;
+ base += CACHE_LINE_ROUNDUP(size_subport);
+
+ if (array == e_RTE_SCHED_PORT_ARRAY_PIPE) return base;
+ base += CACHE_LINE_ROUNDUP(size_pipe);
+
+ if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE) return base;
+ base += CACHE_LINE_ROUNDUP(size_queue);
+
+ if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA) return base;
+ base += CACHE_LINE_ROUNDUP(size_queue_extra);
+
+ if (array == e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES) return base;
+ base += CACHE_LINE_ROUNDUP(size_pipe_profiles);
+
+ if (array == e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY) return base;
+ base += CACHE_LINE_ROUNDUP(size_bmp_array);
+
+ if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY) return base;
+ base += CACHE_LINE_ROUNDUP(size_queue_array);
+
+ return base;
+}
+
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params)
+{
+ uint32_t size0, size1;
+ int status;
+
+ status = rte_sched_port_check_params(params);
+ if (status != 0) {
+ RTE_LOG(INFO, SCHED, "Port scheduler params check failed (%d)\n", status);
+
+ return 0;
+ }
+
+ size0 = sizeof(struct rte_sched_port);
+ size1 = rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_TOTAL);
+
+ return (size0 + size1);
+}
+
+static void
+rte_sched_port_config_qsize(struct rte_sched_port *port)
+{
+ /* TC 0 */
+ port->qsize_add[0] = 0;
+ port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
+ port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
+ port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
+
+ /* TC 1 */
+ port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
+ port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
+ port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
+ port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
+
+ /* TC 2 */
+ port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
+ port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
+ port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
+ port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
+
+ /* TC 3 */
+ port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
+ port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
+ port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
+ port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
+
+ port->qsize_sum = port->qsize_add[15] + port->qsize[3];
+}
+
+static void
+rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
+{
+ struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
+
+ RTE_LOG(INFO, SCHED, "Low level config for pipe profile %u:\n"
+ "\tToken bucket: period = %u, credits per period = %u, size = %u\n"
+ "\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov weights = [%hhu, %hhu, %hhu, %hhu]\n"
+ "\tWRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
+ i,
+
+ /* Token bucket */
+ p->tb_period,
+ p->tb_credits_per_period,
+ p->tb_size,
+
+ /* Traffic classes */
+ p->tc_period,
+ p->tc_credits_per_period[0],
+ p->tc_credits_per_period[1],
+ p->tc_credits_per_period[2],
+ p->tc_credits_per_period[3],
+ p->tc_ov_weight[0],
+ p->tc_ov_weight[1],
+ p->tc_ov_weight[2],
+ p->tc_ov_weight[3],
+
+ /* WRR */
+ p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3],
+ p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7],
+ p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11],
+ p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]);
+}
+
+static inline uint64_t
+rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
+{
+ uint64_t time = time_ms;
+ time = (time * rate) / 1000;
+
+ return time;
+}
+
+static void
+rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params)
+{
+ uint32_t i, j;
+
+ for (i = 0; i < port->n_pipe_profiles; i ++) {
+ struct rte_sched_pipe_params *src = params->pipe_profiles + i;
+ struct rte_sched_pipe_profile *dst = port->pipe_profiles + i;
+
+ /* Token Bucket */
+ if (src->tb_rate == params->rate) {
+ dst->tb_credits_per_period = 1;
+ dst->tb_period = 1;
+ } else {
+ double tb_rate = ((double) src->tb_rate) / ((double) params->rate);
+ double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+
+ rte_approx(tb_rate, d, &dst->tb_credits_per_period, &dst->tb_period);
+ }
+ dst->tb_size = src->tb_size;
+
+ /* Traffic Classes */
+ dst->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, params->rate);
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+ dst->tc_credits_per_period[j] = (uint32_t) rte_sched_time_ms_to_bytes(src->tc_period, src->tc_rate[j]);
+ }
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+ dst->tc_ov_weight[j] = src->tc_ov_weight[j];
+ }
+#endif
+
+ /* WRR */
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j ++) {
+ uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ uint32_t lcd, lcd1, lcd2;
+ uint32_t qindex;
+
+ qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+
+ wrr_cost[0] = src->wrr_weights[qindex];
+ wrr_cost[1] = src->wrr_weights[qindex + 1];
+ wrr_cost[2] = src->wrr_weights[qindex + 2];
+ wrr_cost[3] = src->wrr_weights[qindex + 3];
+
+ lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
+ lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
+ lcd = rte_get_lcd(lcd1, lcd2);
+
+ wrr_cost[0] = lcd / wrr_cost[0];
+ wrr_cost[1] = lcd / wrr_cost[1];
+ wrr_cost[2] = lcd / wrr_cost[2];
+ wrr_cost[3] = lcd / wrr_cost[3];
+
+ dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
+ dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
+ dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
+ dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
+ }
+
+ rte_sched_port_log_pipe_profile(port, i);
+ }
+}
+
+struct rte_sched_port *
+rte_sched_port_config(struct rte_sched_port_params *params)
+{
+ struct rte_sched_port *port = NULL;
+ const struct rte_memzone *mz = NULL;
+ uint32_t mem_size, i;
+
+ /* Check user parameters. Determine the amount of memory to allocate */
+ mem_size = rte_sched_port_get_memory_footprint(params);
+ if (mem_size == 0) {
+ return NULL;
+ }
+
+ /* Allocate memory to store the data structures */
+ mz = rte_memzone_lookup(params->name);
+ if (mz) {
+ /* Use existing memzone, provided that its size is big enough */
+ if (mz->len < mem_size) {
+ return NULL;
+ }
+ } else {
+ /* Create new memzone */
+ mz = rte_memzone_reserve(params->name, mem_size, params->socket, 0);
+ if (mz == NULL) {
+ return NULL;
+ }
+ }
+ memset(mz->addr, 0, mem_size);
+ port = (struct rte_sched_port *) mz->addr;
+
+ /* User parameters */
+ port->n_subports_per_port = params->n_subports_per_port;
+ port->n_pipes_per_subport = params->n_pipes_per_subport;
+ port->rate = params->rate;
+ port->frame_overhead = params->frame_overhead;
+ memcpy(port->qsize, params->qsize, sizeof(params->qsize));
+ port->n_pipe_profiles = params->n_pipe_profiles;
+
+#ifdef RTE_SCHED_RED
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+ uint32_t j;
+
+ for (j = 0; j < e_RTE_METER_COLORS; j++) {
+ if (rte_red_config_init(&port->red_config[i][j],
+ params->red_params[i][j].wq_log2,
+ params->red_params[i][j].min_th,
+ params->red_params[i][j].max_th,
+ params->red_params[i][j].maxp_inv) != 0) {
+ return NULL;
+ }
+ }
+ }
+#endif
+
+ /* Timing */
+ port->time_cpu_cycles = rte_get_tsc_cycles();
+ port->time_cpu_bytes = 0;
+ port->time = 0;
+ port->cycles_per_byte = ((double) rte_get_tsc_hz()) / ((double) params->rate);
+
+ /* Scheduling loop detection */
+ port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+ port->pipe_exhaustion = 0;
+
+ /* Grinders */
+ port->busy_grinders = 0;
+ port->pkts_out = NULL;
+ port->n_pkts_out = 0;
+
+ /* Queue base calculation */
+ rte_sched_port_config_qsize(port);
+
+ /* Large data structures */
+ port->subport = (struct rte_sched_subport *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_SUBPORT));
+ port->pipe = (struct rte_sched_pipe *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE));
+ port->queue = (struct rte_sched_queue *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE));
+ port->queue_extra = (struct rte_sched_queue_extra *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA));
+ port->pipe_profiles = (struct rte_sched_pipe_profile *) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES));
+ port->bmp_array = port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY);
+ port->queue_array = (struct rte_mbuf **) (port->memory + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY));
+
+ /* Pipe profile table */
+ rte_sched_port_config_pipe_profile_table(port, params);
+
+ /* Bitmap */
+ if (rte_bitmap_init(&port->bmp, port->bmp_array, rte_sched_port_queues_per_port(port)) != 0) {
+ RTE_LOG(INFO, SCHED, "Bitmap init error\n");
+ return NULL;
+ }
+ for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) {
+ port->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
+ }
+
+ return port;
+}
+
+void
+rte_sched_port_free(struct rte_sched_port *port)
+{
+ /* Check user parameters */
+ if (port == NULL){
+ return;
+ }
+ rte_bitmap_free(&port->bmp);
+
+ return;
+}
+
+static void
+rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
+{
+ struct rte_sched_subport *s = port->subport + i;
+
+ RTE_LOG(INFO, SCHED, "Low level config for subport %u:\n"
+ "\tToken bucket: period = %u, credits per period = %u, size = %u\n"
+ "\tTraffic classes: period = %u, credits per period = [%u, %u, %u, %u], ov period = %u\n",
+ i,
+
+ /* Token bucket */
+ s->tb_period,
+ s->tb_credits_per_period,
+ s->tb_size,
+
+ /* Traffic classes */
+ s->tc_period,
+ s->tc_credits_per_period[0],
+ s->tc_credits_per_period[1],
+ s->tc_credits_per_period[2],
+ s->tc_credits_per_period[3],
+ s->tc_ov_period);
+}
+
+int
+rte_sched_subport_config(struct rte_sched_port *port,
+ uint32_t subport_id,
+ struct rte_sched_subport_params *params)
+{
+ struct rte_sched_subport *s;
+ uint32_t i;
+
+ /* Check user parameters */
+ if ((port == NULL) ||
+ (subport_id >= port->n_subports_per_port) ||
+ (params == NULL)) {
+ return -1;
+ }
+
+ if ((params->tb_rate == 0) || (params->tb_rate > port->rate)) {
+ return -2;
+ }
+
+ if (params->tb_size == 0) {
+ return -3;
+ }
+
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ if ((params->tc_rate[i] == 0) || (params->tc_rate[i] > params->tb_rate)) {
+ return -4;
+ }
+ }
+
+ if (params->tc_period == 0) {
+ return -5;
+ }
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ if ((params->tc_ov_period == 0) || (params->tc_ov_period > params->tc_period)) {
+ return -6;
+ }
+#endif
+
+ s = port->subport + subport_id;
+
+ /* Token Bucket (TB) */
+ if (params->tb_rate == port->rate) {
+ s->tb_credits_per_period = 1;
+ s->tb_period = 1;
+ } else {
+ double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
+ double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+
+ rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
+ }
+ s->tb_size = params->tb_size;
+ s->tb_time = port->time;
+ s->tb_credits = s->tb_size / 2;
+
+ /* Traffic Classes (TCs) */
+ s->tc_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ s->tc_credits_per_period[i] = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_period, params->tc_rate[i]);
+ }
+ s->tc_time = port->time + s->tc_period;
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ s->tc_credits[i] = s->tc_credits_per_period[i];
+ }
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ /* TC oversubscription */
+ s->tc_ov_period = (uint32_t) rte_sched_time_ms_to_bytes(params->tc_ov_period, port->rate);
+ s->tc_ov_time = port->time + s->tc_ov_period;
+ s->tc_ov_period_id = 0;
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ s->tc_ov[i] = 0;
+ s->tc_ov_n[i] = 0;
+ s->tc_ov_rate[i] = 0;
+ s->tc_ov_credits[i] = 0;
+ }
+#endif
+
+ rte_sched_port_log_subport_config(port, subport_id);
+
+ return 0;
+}
+
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+ uint32_t subport_id,
+ uint32_t pipe_id,
+ int32_t pipe_profile)
+{
+ struct rte_sched_subport *s;
+ struct rte_sched_pipe *p;
+ struct rte_sched_pipe_profile *params;
+ uint32_t deactivate, profile, i;
+
+ /* Check user parameters */
+ profile = (uint32_t) pipe_profile;
+ deactivate = (pipe_profile < 0);
+ if ((port == NULL) ||
+ (subport_id >= port->n_subports_per_port) ||
+ (pipe_id >= port->n_pipes_per_subport) ||
+ ((!deactivate) && (profile >= port->n_pipe_profiles))) {
+ return -1;
+ }
+
+ /* Check that subport configuration is valid */
+ s = port->subport + subport_id;
+ if (s->tb_period == 0) {
+ return -2;
+ }
+
+ p = port->pipe + (subport_id * port->n_pipes_per_subport + pipe_id);
+
+ /* Handle the case when pipe already has a valid configuration */
+ if (p->tb_time) {
+ params = port->pipe_profiles + p->profile;
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ /* Unplug pipe from its subport */
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ s->tc_ov_n[i] -= params->tc_ov_weight[i];
+ s->tc_ov_rate[i] -= ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period);
+ s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period));
+ }
+#endif
+
+ /* Reset the pipe */
+ memset(p, 0, sizeof(struct rte_sched_pipe));
+ }
+
+ if (deactivate) {
+ return 0;
+ }
+
+ /* Apply the new pipe configuration */
+ p->profile = profile;
+ params = port->pipe_profiles + p->profile;
+
+ /* Token Bucket (TB) */
+ p->tb_time = port->time;
+ p->tb_credits = params->tb_size / 2;
+
+ /* Traffic Classes (TCs) */
+ p->tc_time = port->time + params->tc_period;
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ p->tc_credits[i] = params->tc_credits_per_period[i];
+ }
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ /* Subport TC oversubscription */
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ s->tc_ov_n[i] += params->tc_ov_weight[i];
+ s->tc_ov_rate[i] += ((double) params->tc_credits_per_period[i]) / ((double) params->tc_period);
+ s->tc_ov[i] = s->tc_ov_rate[i] > (((double) s->tc_credits_per_period[i]) / ((double) s->tc_period));
+ }
+ p->tc_ov_period_id = s->tc_ov_period_id;
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ p->tc_ov_credits[i] = 0;
+ }
+#endif
+
+ return 0;
+}
+
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+ uint32_t subport_id,
+ struct rte_sched_subport_stats *stats,
+ uint32_t *tc_ov)
+{
+ struct rte_sched_subport *s;
+ uint32_t mask, i;
+
+ /* Check user parameters */
+ if ((port == NULL) ||
+ (subport_id >= port->n_subports_per_port) ||
+ (stats == NULL) ||
+ (tc_ov == NULL)) {
+ return -1;
+ }
+ s = port->subport + subport_id;
+
+ /* Copy subport stats and clear */
+ memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
+ memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
+
+ /* Subport TC ovesubscription status */
+ mask = 0;
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i ++) {
+ mask |= ((uint32_t) s->tc_ov[i]) << i;
+ }
+ *tc_ov = mask;
+
+ return 0;
+}
+
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+ uint32_t queue_id,
+ struct rte_sched_queue_stats *stats,
+ uint16_t *qlen)
+{
+ struct rte_sched_queue *q;
+ struct rte_sched_queue_extra *qe;
+
+ /* Check user parameters */
+ if ((port == NULL) ||
+ (queue_id >= rte_sched_port_queues_per_port(port)) ||
+ (stats == NULL) ||
+ (qlen == NULL)) {
+ return -1;
+ }
+ q = port->queue + queue_id;
+ qe = port->queue_extra + queue_id;
+
+ /* Copy queue stats and clear */
+ memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
+ memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
+
+ /* Queue length */
+ *qlen = q->qw - q->qr;
+
+ return 0;
+}
+
+static inline uint32_t
+rte_sched_port_qindex(struct rte_sched_port *port, uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue)
+{
+ uint32_t result;
+
+ result = subport * port->n_pipes_per_subport + pipe;
+ result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + traffic_class;
+ result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
+
+ return result;
+}
+
+static inline struct rte_mbuf **
+rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex)
+{
+ uint32_t pindex = qindex >> 4;
+ uint32_t qpos = qindex & 0xF;
+
+ return (port->queue_array + pindex * port->qsize_sum + port->qsize_add[qpos]);
+}
+
+static inline uint16_t
+rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
+{
+ uint32_t tc = (qindex >> 2) & 0x3;
+
+ return port->qsize[tc];
+}
+
+#if RTE_SCHED_DEBUG
+
+static inline int
+rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex)
+{
+ struct rte_sched_queue *queue = port->queue + qindex;
+
+ return (queue->qr == queue->qw);
+}
+
+static inline int
+rte_sched_port_queue_is_full(struct rte_sched_port *port, uint32_t qindex)
+{
+ struct rte_sched_queue *queue = port->queue + qindex;
+ uint16_t qsize = rte_sched_port_qsize(port, qindex);
+ uint16_t qlen = q->qw - q->qr;
+
+ return (qlen >= qsize);
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline void
+rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+ struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+ uint32_t tc_index = (qindex >> 2) & 0x3;
+ uint32_t pkt_len = pkt->pkt.pkt_len;
+
+ s->stats.n_pkts_tc[tc_index] += 1;
+ s->stats.n_bytes_tc[tc_index] += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+ struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+ uint32_t tc_index = (qindex >> 2) & 0x3;
+ uint32_t pkt_len = pkt->pkt.pkt_len;
+
+ s->stats.n_pkts_tc_dropped[tc_index] += 1;
+ s->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_queue_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+ struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+ uint32_t pkt_len = pkt->pkt.pkt_len;
+
+ qe->stats.n_pkts += 1;
+ qe->stats.n_bytes += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+ struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+ uint32_t pkt_len = pkt->pkt.pkt_len;
+
+ qe->stats.n_pkts_dropped += 1;
+ qe->stats.n_bytes_dropped += pkt_len;
+}
+
+#endif /* RTE_SCHED_COLLECT_STATS */
+
+#ifdef RTE_SCHED_RED
+
+static inline int
+rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint32_t qindex, uint16_t qlen)
+{
+ struct rte_sched_queue_extra *qe;
+ struct rte_red_config *red_cfg;
+ struct rte_red *red;
+ uint32_t tc_index;
+ enum rte_meter_color color;
+
+ tc_index = (qindex >> 2) & 0x3;
+ color = rte_sched_port_pkt_read_color(pkt);
+ red_cfg = &port->red_config[tc_index][color];
+
+ qe = port->queue_extra + qindex;
+ red = &qe->red;
+
+ return rte_red_enqueue(red_cfg, red, qlen, port->time);
+}
+
+static inline void
+rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex)
+{
+ struct rte_sched_queue_extra *qe;
+ struct rte_red *red;
+
+ qe = port->queue_extra + qindex;
+ red = &qe->red;
+
+ rte_red_mark_queue_empty(red, port->time);
+}
+
+#else
+
+#define rte_sched_port_red_drop(port, pkt, qindex, qlen) 0
+
+#define rte_sched_port_set_queue_empty_timestamp(port, qindex)
+
+#endif /* RTE_SCHED_RED */
+
+#if RTE_SCHED_DEBUG
+
+static inline int
+debug_pipe_is_empty(struct rte_sched_port *port, uint32_t pindex)
+{
+ uint32_t qindex, i;
+
+ qindex = pindex << 4;
+
+ for (i = 0; i < 16; i ++){
+ uint32_t queue_empty = rte_sched_port_queue_is_empty(port, qindex + i);
+ uint32_t bmp_bit_clear = (rte_bitmap_get(&port->bmp, qindex + i) == 0);
+
+ if (queue_empty != bmp_bit_clear){
+ rte_panic("Queue status mismatch for queue %u of pipe %u\n", i, pindex);
+ }
+
+ if (!queue_empty){
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static inline void
+debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos, uint64_t bmp_slab)
+{
+ uint64_t mask;
+ uint32_t i, panic;
+
+ if (bmp_slab == 0){
+ rte_panic("Empty slab at position %u\n", bmp_pos);
+ }
+
+ panic = 0;
+ for (i = 0, mask = 1; i < 64; i ++, mask <<= 1) {
+ if (mask & bmp_slab){
+ if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) {
+ printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
+ panic = 1;
+ }
+ }
+ }
+
+ if (panic){
+ rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
+ bmp_slab, bmp_pos);
+ }
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+static inline uint32_t
+rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_port *port, struct rte_mbuf *pkt)
+{
+ struct rte_sched_queue *q;
+#ifdef RTE_SCHED_COLLECT_STATS
+ struct rte_sched_queue_extra *qe;
+#endif
+ uint32_t subport, pipe, traffic_class, queue, qindex;
+
+ rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue);
+
+ qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
+ q = port->queue + qindex;
+ rte_prefetch0(q);
+#ifdef RTE_SCHED_COLLECT_STATS
+ qe = port->queue_extra + qindex;
+ rte_prefetch0(qe);
+#endif
+
+ return qindex;
+}
+
+static inline void
+rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase)
+{
+ struct rte_sched_queue *q;
+ struct rte_mbuf **q_qw;
+ uint16_t qsize;
+
+ q = port->queue + qindex;
+ qsize = rte_sched_port_qsize(port, qindex);
+ q_qw = qbase + (q->qw & (qsize - 1));
+
+ rte_prefetch0(q_qw);
+ rte_bitmap_prefetch0(&port->bmp, qindex);
+}
+
+static inline int
+rte_sched_port_enqueue_qwa(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf **qbase, struct rte_mbuf *pkt)
+{
+ struct rte_sched_queue *q;
+ uint16_t qsize;
+ uint16_t qlen;
+
+ q = port->queue + qindex;
+ qsize = rte_sched_port_qsize(port, qindex);
+ qlen = q->qw - q->qr;
+
+ /* Drop the packet (and update drop stats) when queue is full */
+ if (unlikely(rte_sched_port_red_drop(port, pkt, qindex, qlen) || (qlen >= qsize))) {
+ rte_pktmbuf_free(pkt);
+#ifdef RTE_SCHED_COLLECT_STATS
+ rte_sched_port_update_subport_stats_on_drop(port, qindex, pkt);
+ rte_sched_port_update_queue_stats_on_drop(port, qindex, pkt);
+#endif
+ return 0;
+ }
+
+ /* Enqueue packet */
+ qbase[q->qw & (qsize - 1)] = pkt;
+ q->qw ++;
+
+ /* Activate queue in the port bitmap */
+ rte_bitmap_set(&port->bmp, qindex);
+
+ /* Statistics */
+#ifdef RTE_SCHED_COLLECT_STATS
+ rte_sched_port_update_subport_stats(port, qindex, pkt);
+ rte_sched_port_update_queue_stats(port, qindex, pkt);
+#endif
+
+ return 1;
+}
+
+#if RTE_SCHED_ENQUEUE == 0
+
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+ uint32_t result, i;
+
+ result = 0;
+
+ for (i = 0; i < n_pkts; i ++) {
+ struct rte_mbuf *pkt;
+ struct rte_mbuf **q_base;
+ uint32_t subport, pipe, traffic_class, queue, qindex;
+
+ pkt = pkts[i];
+
+ rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue);
+
+ qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
+
+ q_base = rte_sched_port_qbase(port, qindex);
+
+ result += rte_sched_port_enqueue_qwa(port, qindex, q_base, pkt);
+ }
+
+ return result;
+}
+
+#else
+
+/* The enqueue function implements a 4-level pipeline with each stage processing
+ * two different packets. The purpose of using a pipeline is to hide the latency
+ * of prefetching the data structures. The naming convention is presented in the
+ * diagram below:
+ *
+ * p00 _______ p10 _______ p20 _______ p30 _______
+ * ----->| |----->| |----->| |----->| |----->
+ * | 0 | | 1 | | 2 | | 3 |
+ * ----->|_______|----->|_______|----->|_______|----->|_______|----->
+ * p01 p11 p21 p31
+ *
+ ***/
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+ struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21, *pkt30, *pkt31, *pkt_last;
+ struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base, **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
+ uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
+ uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
+ uint32_t result, i;
+
+ result = 0;
+
+ /* Less then 6 input packets available, which is not enough to feed the pipeline */
+ if (unlikely(n_pkts < 6)) {
+ struct rte_mbuf **q_base[5];
+ uint32_t q[5];
+
+ /* Prefetch the mbuf structure of each packet */
+ for (i = 0; i < n_pkts; i ++) {
+ rte_prefetch0(pkts[i]);
+ }
+
+ /* Prefetch the queue structure for each queue */
+ for (i = 0; i < n_pkts; i ++) {
+ q[i] = rte_sched_port_enqueue_qptrs_prefetch0(port, pkts[i]);
+ }
+
+ /* Prefetch the write pointer location of each queue */
+ for (i = 0; i < n_pkts; i ++) {
+ q_base[i] = rte_sched_port_qbase(port, q[i]);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q[i], q_base[i]);
+ }
+
+ /* Write each packet to its queue */
+ for (i = 0; i < n_pkts; i ++) {
+ result += rte_sched_port_enqueue_qwa(port, q[i], q_base[i], pkts[i]);
+ }
+
+ return result;
+ }
+
+ /* Feed the first 3 stages of the pipeline (6 packets needed) */
+ pkt20 = pkts[0];
+ pkt21 = pkts[1];
+ rte_prefetch0(pkt20);
+ rte_prefetch0(pkt21);
+
+ pkt10 = pkts[2];
+ pkt11 = pkts[3];
+ rte_prefetch0(pkt10);
+ rte_prefetch0(pkt11);
+
+ q20 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt20);
+ q21 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt21);
+
+ pkt00 = pkts[4];
+ pkt01 = pkts[5];
+ rte_prefetch0(pkt00);
+ rte_prefetch0(pkt01);
+
+ q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+ q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+
+ q20_base = rte_sched_port_qbase(port, q20);
+ q21_base = rte_sched_port_qbase(port, q21);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+
+ /* Run the pipeline */
+ for (i = 6; i < (n_pkts & (~1)); i += 2) {
+ /* Propagate stage inputs */
+ pkt30 = pkt20;
+ pkt31 = pkt21;
+ pkt20 = pkt10;
+ pkt21 = pkt11;
+ pkt10 = pkt00;
+ pkt11 = pkt01;
+ q30 = q20;
+ q31 = q21;
+ q20 = q10;
+ q21 = q11;
+ q30_base = q20_base;
+ q31_base = q21_base;
+
+ /* Stage 0: Get packets in */
+ pkt00 = pkts[i];
+ pkt01 = pkts[i + 1];
+ rte_prefetch0(pkt00);
+ rte_prefetch0(pkt01);
+
+ /* Stage 1: Prefetch queue structure storing queue pointers */
+ q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+ q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+
+ /* Stage 2: Prefetch queue write location */
+ q20_base = rte_sched_port_qbase(port, q20);
+ q21_base = rte_sched_port_qbase(port, q21);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+
+ /* Stage 3: Write packet to queue and activate queue */
+ r30 = rte_sched_port_enqueue_qwa(port, q30, q30_base, pkt30);
+ r31 = rte_sched_port_enqueue_qwa(port, q31, q31_base, pkt31);
+ result += r30 + r31;
+ }
+
+ /* Drain the pipeline (exactly 6 packets). Handle the last packet in the case
+ of an odd number of input packets. */
+ pkt_last = pkts[n_pkts - 1];
+ rte_prefetch0(pkt_last);
+
+ q00 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt00);
+ q01 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt01);
+
+ q10_base = rte_sched_port_qbase(port, q10);
+ q11_base = rte_sched_port_qbase(port, q11);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q10, q10_base);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q11, q11_base);
+
+ r20 = rte_sched_port_enqueue_qwa(port, q20, q20_base, pkt20);
+ r21 = rte_sched_port_enqueue_qwa(port, q21, q21_base, pkt21);
+ result += r20 + r21;
+
+ q_last = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt_last);
+
+ q00_base = rte_sched_port_qbase(port, q00);
+ q01_base = rte_sched_port_qbase(port, q01);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q00, q00_base);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q01, q01_base);
+
+ r10 = rte_sched_port_enqueue_qwa(port, q10, q10_base, pkt10);
+ r11 = rte_sched_port_enqueue_qwa(port, q11, q11_base, pkt11);
+ result += r10 + r11;
+
+ q_last_base = rte_sched_port_qbase(port, q_last);
+ rte_sched_port_enqueue_qwa_prefetch0(port, q_last, q_last_base);
+
+ r00 = rte_sched_port_enqueue_qwa(port, q00, q00_base, pkt00);
+ r01 = rte_sched_port_enqueue_qwa(port, q01, q01_base, pkt01);
+ result += r00 + r01;
+
+ if (n_pkts & 1) {
+ r_last = rte_sched_port_enqueue_qwa(port, q_last, q_last_base, pkt_last);
+ result += r_last;
+ }
+
+ return result;
+}
+
+#endif /* RTE_SCHED_ENQUEUE */
+
+#if RTE_SCHED_TS_CREDITS_UPDATE == 0
+
+#define grinder_credits_update(port, pos)
+
+#elif !defined(RTE_SCHED_SUBPORT_TC_OV)
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_sched_subport *subport = grinder->subport;
+ struct rte_sched_pipe *pipe = grinder->pipe;
+ struct rte_sched_pipe_profile *params = grinder->pipe_params;
+ uint64_t n_periods;
+
+ /* Subport TB */
+ n_periods = (port->time - subport->tb_time) / subport->tb_period;
+ subport->tb_credits += n_periods * subport->tb_credits_per_period;
+ subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+ subport->tb_time += n_periods * subport->tb_period;
+
+ /* Pipe TB */
+ n_periods = (port->time - pipe->tb_time) / params->tb_period;
+ pipe->tb_credits += n_periods * params->tb_credits_per_period;
+ pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+ pipe->tb_time += n_periods * params->tb_period;
+
+ /* Subport TCs */
+ if (unlikely(port->time >= subport->tc_time)) {
+ subport->tc_credits[0] = subport->tc_credits_per_period[0];
+ subport->tc_credits[1] = subport->tc_credits_per_period[1];
+ subport->tc_credits[2] = subport->tc_credits_per_period[2];
+ subport->tc_credits[3] = subport->tc_credits_per_period[3];
+ subport->tc_time = port->time + subport->tc_period;
+ }
+
+ /* Pipe TCs */
+ if (unlikely(port->time >= pipe->tc_time)) {
+ pipe->tc_credits[0] = params->tc_credits_per_period[0];
+ pipe->tc_credits[1] = params->tc_credits_per_period[1];
+ pipe->tc_credits[2] = params->tc_credits_per_period[2];
+ pipe->tc_credits[3] = params->tc_credits_per_period[3];
+ pipe->tc_time = port->time + params->tc_period;
+ }
+}
+
+#else
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_sched_subport *subport = grinder->subport;
+ struct rte_sched_pipe *pipe = grinder->pipe;
+ struct rte_sched_pipe_profile *params = grinder->pipe_params;
+ uint64_t n_periods;
+
+ /* Subport TB */
+ n_periods = (port->time - subport->tb_time) / subport->tb_period;
+ subport->tb_credits += n_periods * subport->tb_credits_per_period;
+ subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+ subport->tb_time += n_periods * subport->tb_period;
+
+ /* Pipe TB */
+ n_periods = (port->time - pipe->tb_time) / params->tb_period;
+ pipe->tb_credits += n_periods * params->tb_credits_per_period;
+ pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+ pipe->tb_time += n_periods * params->tb_period;
+
+ /* Subport TCs */
+ if (unlikely(port->time >= subport->tc_ov_time)) {
+ uint64_t n_ov_periods;
+
+ if (unlikely(port->time >= subport->tc_time)) {
+ subport->tc_credits[0] = subport->tc_credits_per_period[0];
+ subport->tc_credits[1] = subport->tc_credits_per_period[1];
+ subport->tc_credits[2] = subport->tc_credits_per_period[2];
+ subport->tc_credits[3] = subport->tc_credits_per_period[3];
+
+ subport->tc_time = port->time + subport->tc_period;
+ }
+
+ n_ov_periods = (subport->tc_time - port->time + subport->tc_ov_period - 1) / subport->tc_ov_period;
+
+ subport->tc_ov_credits[0] = subport->tc_credits[0] / (n_ov_periods * subport->tc_ov_n[0]);
+ subport->tc_ov_credits[1] = subport->tc_credits[1] / (n_ov_periods * subport->tc_ov_n[1]);
+ subport->tc_ov_credits[2] = subport->tc_credits[2] / (n_ov_periods * subport->tc_ov_n[2]);
+ subport->tc_ov_credits[3] = subport->tc_credits[3] / (n_ov_periods * subport->tc_ov_n[3]);
+
+ subport->tc_ov_time = port->time + subport->tc_ov_period;
+ subport->tc_ov_period_id ++;
+ }
+
+ /* Pipe TCs */
+ if (unlikely(port->time >= pipe->tc_time)) {
+ pipe->tc_credits[0] = params->tc_credits_per_period[0];
+ pipe->tc_credits[1] = params->tc_credits_per_period[1];
+ pipe->tc_credits[2] = params->tc_credits_per_period[2];
+ pipe->tc_credits[3] = params->tc_credits_per_period[3];
+ pipe->tc_time = port->time + params->tc_period;
+ }
+ if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
+ uint32_t pipe_tc_ov_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t tc_mask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint32_t mask[] = {UINT32_MAX, 0};
+
+ tc_mask[0] = mask[subport->tc_ov[0]];
+ tc_mask[1] = mask[subport->tc_ov[1]];
+ tc_mask[2] = mask[subport->tc_ov[2]];
+ tc_mask[3] = mask[subport->tc_ov[3]];
+
+ pipe_tc_ov_credits[0] = subport->tc_ov_credits[0] * params->tc_ov_weight[0];
+ pipe_tc_ov_credits[1] = subport->tc_ov_credits[1] * params->tc_ov_weight[1];
+ pipe_tc_ov_credits[2] = subport->tc_ov_credits[2] * params->tc_ov_weight[2];
+ pipe_tc_ov_credits[3] = subport->tc_ov_credits[3] * params->tc_ov_weight[3];
+
+ pipe->tc_ov_credits[0] = (tc_mask[0] & pipe->tc_credits[0]) | ((~ tc_mask[0]) & pipe_tc_ov_credits[0]);
+ pipe->tc_ov_credits[1] = (tc_mask[1] & pipe->tc_credits[1]) | ((~ tc_mask[1]) & pipe_tc_ov_credits[1]);
+ pipe->tc_ov_credits[2] = (tc_mask[2] & pipe->tc_credits[2]) | ((~ tc_mask[2]) & pipe_tc_ov_credits[2]);
+ pipe->tc_ov_credits[3] = (tc_mask[3] & pipe->tc_credits[3]) | ((~ tc_mask[3]) & pipe_tc_ov_credits[3]);
+
+ pipe->tc_ov_period_id = subport->tc_ov_period_id;
+ }
+}
+
+#endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
+
+#ifndef RTE_SCHED_SUBPORT_TC_OV
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_sched_subport *subport = grinder->subport;
+ struct rte_sched_pipe *pipe = grinder->pipe;
+ struct rte_mbuf *pkt = grinder->pkt;
+ uint32_t tc_index = grinder->tc_index;
+ uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+ int enough_credits;
+
+ /* Check queue credits */
+ enough_credits = (pkt_len <= subport->tb_credits) &&
+ (pkt_len <= subport->tc_credits[tc_index]) &&
+ (pkt_len <= pipe->tb_credits) &&
+ (pkt_len <= pipe->tc_credits[tc_index]);
+
+ if (!enough_credits) {
+ return 0;
+ }
+
+ /* Update port credits */
+ subport->tb_credits -= pkt_len;
+ subport->tc_credits[tc_index] -= pkt_len;
+ pipe->tb_credits -= pkt_len;
+ pipe->tc_credits[tc_index] -= pkt_len;
+
+ return 1;
+}
+
+#else
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_sched_subport *subport = grinder->subport;
+ struct rte_sched_pipe *pipe = grinder->pipe;
+ struct rte_mbuf *pkt = grinder->pkt;
+ uint32_t tc_index = grinder->tc_index;
+ uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+ uint32_t subport_tb_credits = subport->tb_credits;
+ uint32_t subport_tc_credits = subport->tc_credits[tc_index];
+ uint32_t pipe_tb_credits = pipe->tb_credits;
+ uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
+ uint32_t pipe_tc_ov_credits = pipe->tc_ov_credits[tc_index];
+ int enough_credits;
+
+ /* Check pipe and subport credits */
+ enough_credits = (pkt_len <= subport_tb_credits) &&
+ (pkt_len <= subport_tc_credits) &&
+ (pkt_len <= pipe_tb_credits) &&
+ (pkt_len <= pipe_tc_credits) &&
+ (pkt_len <= pipe_tc_ov_credits);
+
+ if (!enough_credits) {
+ return 0;
+ }
+
+ /* Update pipe and subport credits */
+ subport->tb_credits -= pkt_len;
+ subport->tc_credits[tc_index] -= pkt_len;
+ pipe->tb_credits -= pkt_len;
+ pipe->tc_credits[tc_index] -= pkt_len;
+ pipe->tc_ov_credits[tc_index] -= pkt_len;
+
+ return 1;
+}
+
+#endif /* RTE_SCHED_SUBPORT_TC_OV */
+
+static inline int
+grinder_schedule(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+ struct rte_mbuf *pkt = grinder->pkt;
+ uint32_t pkt_len = pkt->pkt.pkt_len + port->frame_overhead;
+
+#if RTE_SCHED_TS_CREDITS_CHECK
+ if (!grinder_credits_check(port, pos)) {
+ return 0;
+ }
+#endif
+
+ /* Advance port time */
+ port->time += pkt_len;
+
+ /* Send packet */
+ port->pkts_out[port->n_pkts_out ++] = pkt;
+ queue->qr ++;
+ grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos];
+ if (queue->qr == queue->qw) {
+ uint32_t qindex = grinder->qindex[grinder->qpos];
+
+ rte_bitmap_clear(&port->bmp, qindex);
+ grinder->qmask &= ~(1 << grinder->qpos);
+ grinder->wrr_mask[grinder->qpos] = 0;
+ rte_sched_port_set_queue_empty_timestamp(port, qindex);
+ }
+
+ /* Reset pipe loop detection */
+ port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+ grinder->productive = 1;
+
+ return 1;
+}
+
+#if RTE_SCHED_OPTIMIZATIONS
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+ __m128i index = _mm_set1_epi32 (base_pipe);
+ __m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos);
+ __m128i res = _mm_cmpeq_epi32(pipes, index);
+ pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4));
+ pipes = _mm_cmpeq_epi32(pipes, index);
+ res = _mm_or_si128(res, pipes);
+
+ if (_mm_testz_si128(res, res))
+ return 0;
+
+ return 1;
+}
+
+#else
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+ uint32_t i;
+
+ for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i ++) {
+ if (port->grinder_base_bmp_pos[i] == base_pipe) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#endif /* RTE_SCHED_OPTIMIZATIONS */
+
+static inline void
+grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ uint16_t w[4];
+
+ grinder->pcache_w = 0;
+ grinder->pcache_r = 0;
+
+ w[0] = (uint16_t) bmp_slab;
+ w[1] = (uint16_t) (bmp_slab >> 16);
+ w[2] = (uint16_t) (bmp_slab >> 32);
+ w[3] = (uint16_t) (bmp_slab >> 48);
+
+ grinder->pcache_qmask[grinder->pcache_w] = w[0];
+ grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
+ grinder->pcache_w += (w[0] != 0);
+
+ grinder->pcache_qmask[grinder->pcache_w] = w[1];
+ grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
+ grinder->pcache_w += (w[1] != 0);
+
+ grinder->pcache_qmask[grinder->pcache_w] = w[2];
+ grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
+ grinder->pcache_w += (w[2] != 0);
+
+ grinder->pcache_qmask[grinder->pcache_w] = w[3];
+ grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
+ grinder->pcache_w += (w[3] != 0);
+}
+
+static inline void
+grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ uint8_t b[4];
+
+ grinder->tccache_w = 0;
+ grinder->tccache_r = 0;
+
+ b[0] = (uint8_t) (qmask & 0xF);
+ b[1] = (uint8_t) ((qmask >> 4) & 0xF);
+ b[2] = (uint8_t) ((qmask >> 8) & 0xF);
+ b[3] = (uint8_t) ((qmask >> 12) & 0xF);
+
+ grinder->tccache_qmask[grinder->tccache_w] = b[0];
+ grinder->tccache_qindex[grinder->tccache_w] = qindex;
+ grinder->tccache_w += (b[0] != 0);
+
+ grinder->tccache_qmask[grinder->tccache_w] = b[1];
+ grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
+ grinder->tccache_w += (b[1] != 0);
+
+ grinder->tccache_qmask[grinder->tccache_w] = b[2];
+ grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
+ grinder->tccache_w += (b[2] != 0);
+
+ grinder->tccache_qmask[grinder->tccache_w] = b[3];
+ grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
+ grinder->tccache_w += (b[3] != 0);
+}
+
+static inline int
+grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_mbuf **qbase;
+ uint32_t qindex;
+ uint16_t qsize;
+
+ if (grinder->tccache_r == grinder->tccache_w) {
+ return 0;
+ }
+
+ qindex = grinder->tccache_qindex[grinder->tccache_r];
+ qbase = rte_sched_port_qbase(port, qindex);
+ qsize = rte_sched_port_qsize(port, qindex);
+
+ grinder->tc_index = (qindex >> 2) & 0x3;
+ grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
+ grinder->qsize = qsize;
+
+ grinder->qindex[0] = qindex;
+ grinder->qindex[1] = qindex + 1;
+ grinder->qindex[2] = qindex + 2;
+ grinder->qindex[3] = qindex + 3;
+
+ grinder->queue[0] = port->queue + qindex;
+ grinder->queue[1] = port->queue + qindex + 1;
+ grinder->queue[2] = port->queue + qindex + 2;
+ grinder->queue[3] = port->queue + qindex + 3;
+
+ grinder->qbase[0] = qbase;
+ grinder->qbase[1] = qbase + qsize;
+ grinder->qbase[2] = qbase + 2 * qsize;
+ grinder->qbase[3] = qbase + 3 * qsize;
+
+ grinder->tccache_r ++;
+ return 1;
+}
+
+static inline int
+grinder_next_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ uint32_t pipe_qindex;
+ uint16_t pipe_qmask;
+
+ if (grinder->pcache_r < grinder->pcache_w) {
+ pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
+ pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
+ grinder->pcache_r ++;
+ } else {
+ uint64_t bmp_slab = 0;
+ uint32_t bmp_pos = 0;
+
+ /* Get another non-empty pipe group */
+ if (unlikely(rte_bitmap_scan(&port->bmp, &bmp_pos, &bmp_slab) <= 0)) {
+ return 0;
+ }
+
+#if RTE_SCHED_DEBUG
+ debug_check_queue_slab(port, bmp_pos, bmp_slab);
+#endif
+
+ /* Return if pipe group already in one of the other grinders */
+ port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
+ if (unlikely(grinder_pipe_exists(port, bmp_pos))) {
+ return 0;
+ }
+ port->grinder_base_bmp_pos[pos] = bmp_pos;
+
+ /* Install new pipe group into grinder's pipe cache */
+ grinder_pcache_populate(port, pos, bmp_pos, bmp_slab);
+
+ pipe_qmask = grinder->pcache_qmask[0];
+ pipe_qindex = grinder->pcache_qindex[0];
+ grinder->pcache_r = 1;
+ }
+
+ /* Install new pipe in the grinder */
+ grinder->pindex = pipe_qindex >> 4;
+ grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport);
+ grinder->pipe = port->pipe + grinder->pindex;
+ grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
+ grinder->productive = 0;
+
+ grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask);
+ grinder_next_tc(port, pos);
+
+ /* Check for pipe exhaustion */
+ if (grinder->pindex == port->pipe_loop) {
+ port->pipe_exhaustion = 1;
+ port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+ }
+
+ return 1;
+}
+
+#if RTE_SCHED_WRR == 0
+
+#define grinder_wrr_load(a,b)
+
+#define grinder_wrr_store(a,b)
+
+static inline void
+grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ uint64_t slab = grinder->qmask;
+
+ if (rte_bsf64(slab, &grinder->qpos) == 0) {
+ rte_panic("grinder wrr\n");
+ }
+}
+
+#elif RTE_SCHED_WRR == 1
+
+static inline void
+grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_sched_pipe *pipe = grinder->pipe;
+ struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
+ uint32_t tc_index = grinder->tc_index;
+ uint32_t qmask = grinder->qmask;
+ uint32_t qindex;
+
+ qindex = tc_index * 4;
+
+ grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT;
+
+ grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
+ grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
+ grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
+ grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
+
+ grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
+ grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
+ grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
+ grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
+}
+
+static inline void
+grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ struct rte_sched_pipe *pipe = grinder->pipe;
+ uint32_t tc_index = grinder->tc_index;
+ uint32_t qindex;
+
+ qindex = tc_index * 4;
+
+ pipe->wrr_tokens[qindex] = (uint8_t) ((grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >> RTE_SCHED_WRR_SHIFT);
+ pipe->wrr_tokens[qindex + 1] = (uint8_t) ((grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >> RTE_SCHED_WRR_SHIFT);
+ pipe->wrr_tokens[qindex + 2] = (uint8_t) ((grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >> RTE_SCHED_WRR_SHIFT);
+ pipe->wrr_tokens[qindex + 3] = (uint8_t) ((grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >> RTE_SCHED_WRR_SHIFT);
+}
+
+static inline void
+grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ uint16_t wrr_tokens_min;
+
+ grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
+ grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
+ grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
+ grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
+
+ grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
+ wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
+
+ grinder->wrr_tokens[0] -= wrr_tokens_min;
+ grinder->wrr_tokens[1] -= wrr_tokens_min;
+ grinder->wrr_tokens[2] -= wrr_tokens_min;
+ grinder->wrr_tokens[3] -= wrr_tokens_min;
+}
+
+#else
+
+#error Invalid value for RTE_SCHED_WRR
+
+#endif /* RTE_SCHED_WRR */
+
+#define grinder_evict(port, pos)
+
+static inline void
+grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+
+ rte_prefetch0(grinder->pipe);
+ rte_prefetch0(grinder->queue[0]);
+}
+
+static inline void
+grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ uint16_t qsize, qr[4];
+
+ qsize = grinder->qsize;
+ qr[0] = grinder->queue[0]->qr & (qsize - 1);
+ qr[1] = grinder->queue[1]->qr & (qsize - 1);
+ qr[2] = grinder->queue[2]->qr & (qsize - 1);
+ qr[3] = grinder->queue[3]->qr & (qsize - 1);
+
+ rte_prefetch0(grinder->qbase[0] + qr[0]);
+ rte_prefetch0(grinder->qbase[1] + qr[1]);
+
+ grinder_wrr_load(port, pos);
+ grinder_wrr(port, pos);
+
+ rte_prefetch0(grinder->qbase[2] + qr[2]);
+ rte_prefetch0(grinder->qbase[3] + qr[3]);
+}
+
+static inline void
+grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+ uint32_t qpos = grinder->qpos;
+ struct rte_mbuf **qbase = grinder->qbase[qpos];
+ uint16_t qsize = grinder->qsize;
+ uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
+
+ grinder->pkt = qbase[qr];
+ rte_prefetch0(grinder->pkt);
+
+ if (unlikely((qr & 0x7) == 7)) {
+ uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
+
+ rte_prefetch0(qbase + qr_next);
+ }
+}
+
+static inline uint32_t
+grinder_handle(struct rte_sched_port *port, uint32_t pos)
+{
+ struct rte_sched_grinder *grinder = port->grinder + pos;
+
+ switch (grinder->state) {
+ case e_GRINDER_PREFETCH_PIPE:
+ {
+ if (grinder_next_pipe(port, pos)) {
+ grinder_prefetch_pipe(port, pos);
+ port->busy_grinders ++;
+
+ grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+ return 0;
+ }
+
+ return 0;
+ }
+
+ case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
+ {
+ struct rte_sched_pipe *pipe = grinder->pipe;
+
+ grinder->pipe_params = port->pipe_profiles + pipe->profile;
+ grinder_prefetch_tc_queue_arrays(port, pos);
+ grinder_credits_update(port, pos);
+
+ grinder->state = e_GRINDER_PREFETCH_MBUF;
+ return 0;
+ }
+
+ case e_GRINDER_PREFETCH_MBUF:
+ {
+ grinder_prefetch_mbuf(port, pos);
+
+ grinder->state = e_GRINDER_READ_MBUF;
+ return 0;
+ }
+
+ case e_GRINDER_READ_MBUF:
+ {
+ uint32_t result = 0;
+
+ result = grinder_schedule(port, pos);
+
+ /* Look for next packet within the same TC */
+ if (result && grinder->qmask) {
+ grinder_wrr(port, pos);
+ grinder_prefetch_mbuf(port, pos);
+
+ return 1;
+ }
+ grinder_wrr_store(port, pos);
+
+ /* Look for another active TC within same pipe */
+ if (grinder_next_tc(port, pos)) {
+ grinder_prefetch_tc_queue_arrays(port, pos);
+
+ grinder->state = e_GRINDER_PREFETCH_MBUF;
+ return result;
+ }
+ if ((grinder->productive == 0) && (port->pipe_loop == RTE_SCHED_PIPE_INVALID)) {
+ port->pipe_loop = grinder->pindex;
+ }
+ grinder_evict(port, pos);
+
+ /* Look for another active pipe */
+ if (grinder_next_pipe(port, pos)) {
+ grinder_prefetch_pipe(port, pos);
+
+ grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+ return result;
+ }
+
+ /* No active pipe found */
+ port->busy_grinders --;
+
+ grinder->state = e_GRINDER_PREFETCH_PIPE;
+ return result;
+ }
+
+ default:
+ rte_panic("Algorithmic error (invalid state)\n");
+ return 0;
+ }
+}
+
+static inline void
+rte_sched_port_time_resync(struct rte_sched_port *port)
+{
+ uint64_t cycles = rte_get_tsc_cycles();
+ uint64_t cycles_diff = cycles - port->time_cpu_cycles;
+ double bytes_diff = ((double) cycles_diff) / port->cycles_per_byte;
+
+ /* Advance port time */
+ port->time_cpu_cycles = cycles;
+ port->time_cpu_bytes += (uint64_t) bytes_diff;
+ if (port->time < port->time_cpu_bytes) {
+ port->time = port->time_cpu_bytes;
+ }
+
+ /* Reset pipe loop detection */
+ port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+}
+
+static inline int
+rte_sched_port_exceptions(struct rte_sched_port *port)
+{
+ int exceptions;
+
+ /* Check if any exception flag is set */
+ exceptions = (port->busy_grinders == 0) ||
+ (port->pipe_exhaustion == 1);
+
+ /* Clear exception flags */
+ port->pipe_exhaustion = 0;
+
+ return exceptions;
+}
+
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+ uint32_t i, count;
+
+ port->pkts_out = pkts;
+ port->n_pkts_out = 0;
+
+ rte_sched_port_time_resync(port);
+
+ /* Take each queue in the grinder one step further */
+ for (i = 0, count = 0; ; i ++) {
+ count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1));
+ if ((count == n_pkts) || rte_sched_port_exceptions(port)) {
+ break;
+ }
+ }
+
+ return count;
+}
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
new file mode 100644
index 0000000..7b49248
--- /dev/null
+++ b/lib/librte_sched/rte_sched.h
@@ -0,0 +1,446 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_H__
+#define __INCLUDE_RTE_SCHED_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Hierarchical Scheduler
+ *
+ * The hierarchical scheduler prioritizes the transmission of packets from different
+ * users and traffic classes according to the Service Level Agreements (SLAs) defined
+ * for the current network node.
+ *
+ * The scheduler supports thousands of packet queues grouped under a 5-level hierarchy:
+ * 1. Port:
+ * - Typical usage: output Ethernet port;
+ * - Multiple ports are scheduled in round robin order with equal priority;
+ * 2. Subport:
+ * - Typical usage: group of users;
+ * - Traffic shaping using the token bucket algorithm (one bucket per subport);
+ * - Upper limit enforced per traffic class at subport level;
+ * - Lower priority traffic classes able to reuse subport bandwidth currently
+ * unused by higher priority traffic classes of the same subport;
+ * - When any subport traffic class is oversubscribed (configuration time
+ * event), the usage of subport member pipes with high demand for that
+ * traffic class pipes is truncated to a dynamically adjusted value with no
+ * impact to low demand pipes;
+ * 3. Pipe:
+ * - Typical usage: individual user/subscriber;
+ * - Traffic shaping using the token bucket algorithm (one bucket per pipe);
+ * 4. Traffic class:
+ * - Traffic classes of the same pipe handled in strict priority order;
+ * - Upper limit enforced per traffic class at the pipe level;
+ * - Lower priority traffic classes able to reuse pipe bandwidth currently
+ * unused by higher priority traffic classes of the same pipe;
+ * 5. Queue:
+ * - Typical usage: queue hosting packets from one or multiple connections
+ * of same traffic class belonging to the same user;
+ * - Weighted Round Robin (WRR) is used to service the queues within same
+ * pipe traffic class.
+ *
+ ***/
+
+#include <sys/types.h>
+#include <rte_mbuf.h>
+#include <rte_meter.h>
+
+/** Random Early Detection (RED) */
+#ifdef RTE_SCHED_RED
+#include "rte_red.h"
+#endif
+
+/** Number of traffic classes per pipe (as well as subport). Cannot be changed. */
+#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE 4
+
+/** Number of queues per pipe traffic class. Cannot be changed. */
+#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS 4
+
+/** Number of queues per pipe. */
+#define RTE_SCHED_QUEUES_PER_PIPE \
+ (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * \
+ RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+
+/** Maximum number of pipe profiles that can be defined per port. Compile-time configurable.*/
+#ifndef RTE_SCHED_PIPE_PROFILES_PER_PORT
+#define RTE_SCHED_PIPE_PROFILES_PER_PORT 256
+#endif
+
+/** Ethernet framing overhead. Overhead fields per Ethernet frame:
+ 1. Preamble: 7 bytes;
+ 2. Start of Frame Delimiter (SFD): 1 byte;
+ 3. Frame Check Sequence (FCS): 4 bytes;
+ 4. Inter Frame Gap (IFG): 12 bytes.
+The FCS is considered overhead only if not included in the packet length (field pkt.pkt_len
+of struct rte_mbuf). */
+#ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT
+#define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24
+#endif
+
+/** Subport configuration parameters. The period and credits_per_period parameters are measured
+in bytes, with one byte meaning the time duration associated with the transmission of one byte
+on the physical medium of the output port, with pipe or pipe traffic class rate (measured as
+percentage of output port rate) determined as credits_per_period divided by period. One credit
+represents one byte. */
+struct rte_sched_subport_params {
+ /* Subport token bucket */
+ uint32_t tb_rate; /**< Subport token bucket rate (measured in bytes per second) */
+ uint32_t tb_size; /**< Subport token bucket size (measured in credits) */
+
+ /* Subport traffic classes */
+ uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Subport traffic class rates (measured in bytes per second) */
+ uint32_t tc_period; /**< Enforcement period for traffic class rates (measured in milliseconds) */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ uint32_t tc_ov_period; /**< Enforcement period for traffic class oversubscription (measured in milliseconds) */
+#endif
+};
+
+/** Subport statistics */
+struct rte_sched_subport_stats {
+ /* Packets */
+ uint32_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets successfully written to current
+ subport for each traffic class */
+ uint32_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets dropped by the current
+ subport for each traffic class due to subport queues being full or congested*/
+
+ /* Bytes */
+ uint32_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes successfully written to current
+ subport for each traffic class*/
+ uint32_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes dropped by the current
+ subport for each traffic class due to subport queues being full or congested */
+};
+
+/** Pipe configuration parameters. The period and credits_per_period parameters are measured
+in bytes, with one byte meaning the time duration associated with the transmission of one byte
+on the physical medium of the output port, with pipe or pipe traffic class rate (measured as
+percentage of output port rate) determined as credits_per_period divided by period. One credit
+represents one byte. */
+struct rte_sched_pipe_params {
+ /* Pipe token bucket */
+ uint32_t tb_rate; /**< Pipe token bucket rate (measured in bytes per second) */
+ uint32_t tb_size; /**< Pipe token bucket size (measured in credits) */
+
+ /* Pipe traffic classes */
+ uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Pipe traffic class rates (measured in bytes per second) */
+ uint32_t tc_period; /**< Enforcement period for pipe traffic class rates (measured in milliseconds) */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Traffic class weights to be used for the
+ current pipe in the event of subport traffic class oversubscription */
+#endif
+
+ /* Pipe queues */
+ uint8_t wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights for the queues of the current pipe */
+};
+
+/** Queue statistics */
+struct rte_sched_queue_stats {
+ /* Packets */
+ uint32_t n_pkts; /**< Number of packets successfully written to current queue */
+ uint32_t n_pkts_dropped; /**< Number of packets dropped due to current queue being full or congested */
+
+ /* Bytes */
+ uint32_t n_bytes; /**< Number of bytes successfully written to current queue */
+ uint32_t n_bytes_dropped; /**< Number of bytes dropped due to current queue being full or congested */
+};
+
+/** Port configuration parameters. */
+struct rte_sched_port_params {
+ const char *name; /**< Literal string to be associated to the current port scheduler instance */
+ int socket; /**< CPU socket ID where the memory for port scheduler should be allocated */
+ uint32_t rate; /**< Output port rate (measured in bytes per second) */
+ uint32_t frame_overhead; /**< Framing overhead per packet (measured in bytes) */
+ uint32_t n_subports_per_port; /**< Number of subports for the current port scheduler instance*/
+ uint32_t n_pipes_per_subport; /**< Number of pipes for each port scheduler subport */
+ uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Packet queue size for each traffic class. All queues
+ within the same pipe traffic class have the same size. Queues from
+ different pipes serving the same traffic class have the same size. */
+ struct rte_sched_pipe_params *pipe_profiles; /**< Pipe profile table defined for current port scheduler instance.
+ Every pipe of the current port scheduler is configured using one of the
+ profiles from this table. */
+ uint32_t n_pipe_profiles; /**< Number of profiles in the pipe profile table */
+#ifdef RTE_SCHED_RED
+ struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; /**< RED parameters */
+#endif
+};
+
+/** Path through the scheduler hierarchy used by the scheduler enqueue operation to
+identify the destination queue for the current packet. Stored in the field pkt.hash.sched
+of struct rte_mbuf of each packet, typically written by the classification stage and read by
+scheduler enqueue.*/
+struct rte_sched_port_hierarchy {
+ uint32_t queue:2; /**< Queue ID (0 .. 3) */
+ uint32_t traffic_class:2; /**< Traffic class ID (0 .. 3)*/
+ uint32_t pipe:20; /**< Pipe ID */
+ uint32_t subport:6; /**< Subport ID */
+ uint32_t color:2; /**< Color */
+};
+
+/*
+ * Configuration
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler port configuration
+ *
+ * @param params
+ * Port scheduler configuration parameter structure
+ * @return
+ * Handle to port scheduler instance upon success or NULL otherwise.
+ */
+struct rte_sched_port *
+rte_sched_port_config(struct rte_sched_port_params *params);
+
+/**
+ * Hierarchical scheduler port free
+ *
+ * @param port
+ * Handle to port scheduler instance
+ */
+void
+rte_sched_port_free(struct rte_sched_port *port);
+
+/**
+ * Hierarchical scheduler subport configuration
+ *
+ * @param port
+ * Handle to port scheduler instance
+ * @param subport_id
+ * Subport ID
+ * @param params
+ * Subport configuration parameters
+ * @return
+ * 0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_config(struct rte_sched_port *port,
+ uint32_t subport_id,
+ struct rte_sched_subport_params *params);
+
+/**
+ * Hierarchical scheduler pipe configuration
+ *
+ * @param port
+ * Handle to port scheduler instance
+ * @param subport_id
+ * Subport ID
+ * @param pipe_id
+ * Pipe ID within subport
+ * @param pipe_profile
+ * ID of port-level pre-configured pipe profile
+ * @return
+ * 0 upon success, error code otherwise
+ */
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+ uint32_t subport_id,
+ uint32_t pipe_id,
+ int32_t pipe_profile);
+
+/**
+ * Hierarchical scheduler memory footprint size per port
+ *
+ * @param params
+ * Port scheduler configuration parameter structure
+ * @return
+ * Memory footprint size in bytes upon success, 0 otherwise
+ */
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params);
+
+/*
+ * Statistics
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler subport statistics read
+ *
+ * @param port
+ * Handle to port scheduler instance
+ * @param subport_id
+ * Subport ID
+ * @param stats
+ * Pointer to pre-allocated subport statistics structure where the statistics
+ * counters should be stored
+ * @param tc_ov
+ * Pointer to pre-allocated 4-entry array where the oversubscription status for
+ * each of the 4 subport traffic classes should be stored.
+ * @return
+ * 0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+ uint32_t subport_id,
+ struct rte_sched_subport_stats *stats,
+ uint32_t *tc_ov);
+
+/**
+ * Hierarchical scheduler queue statistics read
+ *
+ * @param port
+ * Handle to port scheduler instance
+ * @param queue_id
+ * Queue ID within port scheduler
+ * @param stats
+ * Pointer to pre-allocated subport statistics structure where the statistics
+ * counters should be stored
+ * @param qlen
+ * Pointer to pre-allocated variable where the current queue length should be stored.
+ * @return
+ * 0 upon success, error code otherwise
+ */
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+ uint32_t queue_id,
+ struct rte_sched_queue_stats *stats,
+ uint16_t *qlen);
+
+/*
+ * Run-time
+ *
+ ***/
+
+/**
+ * Scheduler hierarchy path write to packet descriptor. Typically called by the
+ * packet classification stage.
+ *
+ * @param pkt
+ * Packet descriptor handle
+ * @param subport
+ * Subport ID
+ * @param pipe
+ * Pipe ID within subport
+ * @param traffic_class
+ * Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ * Queue ID within pipe traffic class (0 .. 3)
+ */
+static inline void
+rte_sched_port_pkt_write(struct rte_mbuf *pkt,
+ uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue, enum rte_meter_color color)
+{
+ struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+
+ sched->color = (uint32_t) color;
+ sched->subport = subport;
+ sched->pipe = pipe;
+ sched->traffic_class = traffic_class;
+ sched->queue = queue;
+}
+
+/**
+ * Scheduler hierarchy path read from packet descriptor (struct rte_mbuf). Typically
+ * called as part of the hierarchical scheduler enqueue operation. The subport,
+ * pipe, traffic class and queue parameters need to be pre-allocated by the caller.
+ *
+ * @param pkt
+ * Packet descriptor handle
+ * @param subport
+ * Subport ID
+ * @param pipe
+ * Pipe ID within subport
+ * @param traffic_class
+ * Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ * Queue ID within pipe traffic class (0 .. 3)
+ *
+ */
+static inline void
+rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue)
+{
+ struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+
+ *subport = sched->subport;
+ *pipe = sched->pipe;
+ *traffic_class = sched->traffic_class;
+ *queue = sched->queue;
+}
+
+static inline enum rte_meter_color
+rte_sched_port_pkt_read_color(struct rte_mbuf *pkt)
+{
+ struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
+
+ return (enum rte_meter_color) sched->color;
+}
+
+/**
+ * Hierarchical scheduler port enqueue. Writes up to n_pkts to port scheduler and
+ * returns the number of packets actually written. For each packet, the port scheduler
+ * queue to write the packet to is identified by reading the hierarchy path from the
+ * packet descriptor; if the queue is full or congested and the packet is not written
+ * to the queue, then the packet is automatically dropped without any action required
+ * from the caller.
+ *
+ * @param port
+ * Handle to port scheduler instance
+ * @param pkts
+ * Array storing the packet descriptor handles
+ * @param n_pkts
+ * Number of packets to enqueue from the pkts array into the port scheduler
+ * @return
+ * Number of packets successfully enqueued
+ */
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+/**
+ * Hierarchical scheduler port dequeue. Reads up to n_pkts from the port scheduler
+ * and stores them in the pkts array and returns the number of packets actually read.
+ * The pkts array needs to be pre-allocated by the caller with at least n_pkts entries.
+ *
+ * @param port
+ * Handle to port scheduler instance
+ * @param pkts
+ * Pre-allocated packet descriptor array where the packets dequeued from the port
+ * scheduler should be stored
+ * @param n_pkts
+ * Number of packets to dequeue from the port scheduler
+ * @return
+ * Number of packets successfully dequeued and placed in the pkts array
+ */
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_H__ */
diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h
new file mode 100644
index 0000000..dc76ad8
--- /dev/null
+++ b/lib/librte_sched/rte_sched_common.h
@@ -0,0 +1,130 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_COMMON_H__
+#define __INCLUDE_RTE_SCHED_COMMON_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#define __rte_aligned_16 __attribute__((__aligned__(16)))
+
+static inline uint32_t
+rte_sched_min_val_2_u32(uint32_t x, uint32_t y)
+{
+ return (x < y)? x : y;
+}
+
+#if 0
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+ uint32_t pos0, pos1;
+
+ pos0 = (x[0] <= x[1])? 0 : 1;
+ pos1 = (x[2] <= x[3])? 2 : 3;
+
+ return (x[pos0] <= x[pos1])? pos0 : pos1;
+}
+
+#else
+
+/* simplified version to remove branches with CMOV instruction */
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+ uint32_t pos0 = 0;
+ uint32_t pos1 = 2;
+
+ if (x[1] <= x[0]) pos0 = 1;
+ if (x[3] <= x[2]) pos1 = 3;
+ if (x[pos1] <= x[pos0]) pos0 = pos1;
+
+ return pos0;
+}
+
+#endif
+
+/*
+ * Compute the Greatest Common Divisor (GCD) of two numbers.
+ * This implementation uses Euclid's algorithm:
+ * gcd(a, 0) = a
+ * gcd(a, b) = gcd(b, a mod b)
+ *
+ */
+static inline uint32_t
+rte_get_gcd(uint32_t a, uint32_t b)
+{
+ uint32_t c;
+
+ if (a == 0)
+ return b;
+ if (b == 0)
+ return a;
+
+ if (a < b) {
+ c = a;
+ a = b;
+ b = c;
+ }
+
+ while (b != 0) {
+ c = a % b;
+ a = b;
+ b = c;
+ }
+
+ return a;
+}
+
+/*
+ * Compute the Lowest Common Denominator (LCD) of two numbers.
+ * This implementation computes GCD first:
+ * LCD(a, b) = (a * b) / GCD(a, b)
+ *
+ */
+static inline uint32_t
+rte_get_lcd(uint32_t a, uint32_t b)
+{
+ return (a * b) / rte_get_gcd(a, b);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_COMMON_H__ */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 8eb45d8..4b80255 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -105,6 +105,12 @@ ifeq ($(CONFIG_RTE_LIBRTE_METER),y)
LDLIBS += -lrte_meter
endif
+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)
+LDLIBS += -lrte_sched
+LDLIBS += -lm
+LDLIBS += -lrt
+endif
+
LDLIBS += --start-group
ifeq ($(CONFIG_RTE_LIBRTE_ETHER),y)