summaryrefslogtreecommitdiff
path: root/lib/librte_eal/linuxapp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/linuxapp')
-rw-r--r--lib/librte_eal/linuxapp/Makefile39
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile91
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c620
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_alarm.c232
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_debug.c114
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_hpet.c232
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_hugepage_info.c229
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c540
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_lcore.c192
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_log.c137
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c796
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c770
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_thread.c237
-rw-r--r--lib/librte_eal/linuxapp/eal/include/eal_fs_paths.h96
-rw-r--r--lib/librte_eal/linuxapp/eal/include/eal_hugepages.h62
-rw-r--r--lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h76
-rw-r--r--lib/librte_eal/linuxapp/eal/include/eal_thread.h55
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h56
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_lcore.h92
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_per_lcore.h69
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/Makefile55
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/igb_uio.c402
22 files changed, 5192 insertions, 0 deletions
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
new file mode 100644
index 0000000..17b4222
--- /dev/null
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -0,0 +1,39 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# version: DPDK.L.1.2.3-3
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += igb_uio
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
new file mode 100644
index 0000000..c600cbf
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -0,0 +1,91 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# version: DPDK.L.1.2.3-3
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_eal.a
+
+VPATH += $(RTE_SDK)/lib/librte_eal/common
+
+CFLAGS += -I$(SRCDIR)/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_ring
+CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
+CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
+CFLAGS += -I$(RTE_SDK)/lib/librte_ether
+CFLAGS += $(WERROR_FLAGS) -O3
+
+# specific to linuxapp exec-env
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_hugepage_info.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_memory.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_thread.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_debug.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_hpet.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_interrupts.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
+
+# from common dir
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_log.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_pci.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memory.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_tailqs.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_errno.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_cpuflags.c
+
+CFLAGS_eal.o := -D_GNU_SOURCE
+CFLAGS_eal_thread.o := -D_GNU_SOURCE
+CFLAGS_eal_log.o := -D_GNU_SOURCE
+CFLAGS_eal_common_log.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_eal_thread.o += -Wno-return-type
+CFLAGS_eal_hpet.o += -Wno-return-type
+endif
+
+INC := rte_per_lcore.h rte_lcore.h rte_interrupts.h
+
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP)-include/exec-env := \
+ $(addprefix include/exec-env/,$(INC))
+
+DEPDIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += lib/librte_eal/common
+
+include $(RTE_SDK)/mk/rte.lib.mk
+
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
new file mode 100644
index 0000000..8d82cc3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -0,0 +1,620 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_common.h>
+#include <rte_version.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_fs_paths.h"
+#include "eal_hugepages.h"
+
+#define OPT_HUGE_DIR "huge-dir"
+#define OPT_PROC_TYPE "proc-type"
+#define OPT_NO_SHCONF "no-shconf"
+#define OPT_NO_HPET "no-hpet"
+#define OPT_NO_PCI "no-pci"
+#define OPT_NO_HUGE "no-huge"
+#define OPT_FILE_PREFIX "file-prefix"
+
+#define RTE_EAL_BLACKLIST_SIZE 0x100
+
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+#define GET_BLACKLIST_FIELD(in, fd, lim, dlm) \
+{ \
+ unsigned long val; \
+ char *end; \
+ errno = 0; \
+ val = strtoul((in), &end, 16); \
+ if (errno != 0 || end[0] != (dlm) || val > (lim)) \
+ return (-EINVAL); \
+ (fd) = (typeof (fd))val; \
+ (in) = end + 1; \
+}
+
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = offsetof(struct rte_mem_config, memseg),
+ .l_len = sizeof(early_mem_config.memseg),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+ .mem_config = &early_mem_config,
+};
+
+static struct rte_pci_addr eal_dev_blacklist[RTE_EAL_BLACKLIST_SIZE];
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+ return &rte_config;
+}
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static void
+rte_eal_config_create(void)
+{
+ void *rte_mem_cfg_addr;
+ int retval;
+
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+ if (mem_cfg_fd < 0)
+ rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+ }
+
+ retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
+ if (retval < 0){
+ close(mem_cfg_fd);
+ rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
+ }
+
+ retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+ if (retval < 0){
+ close(mem_cfg_fd);
+ rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
+ "process running?\n", pathname);
+ }
+
+ rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
+ PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+
+ if (rte_mem_cfg_addr == MAP_FAILED){
+ rte_panic("Cannot mmap memory for rte_config\n");
+ }
+ rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+ memcpy(rte_config.mem_config, &early_mem_config,
+ sizeof(early_mem_config));
+}
+
+/* attach to an existing shared memory config */
+static void
+rte_eal_config_attach(void)
+{
+ void *rte_mem_cfg_addr;
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDONLY);
+ if (mem_cfg_fd < 0)
+ rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+ }
+
+ rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), PROT_READ,
+ MAP_SHARED, mem_cfg_fd, 0);
+ close(mem_cfg_fd);
+ if (rte_mem_cfg_addr == MAP_FAILED)
+ rte_panic("Cannot mmap memory for rte_config\n");
+
+ rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+}
+
+/* Detect if we are a primary or a secondary process */
+static enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+ enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+ const char *pathname = eal_runtime_config_path();
+
+ /* if we can open the file but not get a write-lock we are a secondary
+ * process. NOTE: if we get a file handle back, we keep that open
+ * and don't close it to prevent a race condition between multiple opens */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+
+ RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+ ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+ return ptype;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+static void
+rte_config_init(void)
+{
+ /* set the magic in configuration structure */
+ rte_config.magic = RTE_MAGIC;
+ rte_config.process_type = (internal_config.process_type == RTE_PROC_AUTO) ?
+ eal_proc_type_detect() : /* for auto, detect the type */
+ internal_config.process_type; /* otherwise use what's already set */
+
+ switch (rte_config.process_type){
+ case RTE_PROC_PRIMARY:
+ rte_eal_config_create();
+ break;
+ case RTE_PROC_SECONDARY:
+ rte_eal_config_attach();
+ break;
+ case RTE_PROC_AUTO:
+ case RTE_PROC_INVALID:
+ rte_panic("Invalid process type\n");
+ }
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+ printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b <domain:bus:devid.func>]"
+ "[--proc-type primary|secondary|auto] \n\n"
+ "EAL options:\n"
+ " -c COREMASK: A hexadecimal bitmask of cores to run on\n"
+ " -n NUM : Number of memory channels\n"
+ " -v : Display version information on startup\n"
+ " -b <domain:bus:devid.func>: to prevent EAL from using specified PCI device\n"
+ " (multiple -b options are alowed)\n"
+ " -m MB : memory to allocate (default = size of hugemem)\n"
+ " -r NUM : force number of memory ranks (don't detect)\n"
+ " --"OPT_HUGE_DIR" : directory where hugetlbfs is mounted\n"
+ " --"OPT_PROC_TYPE": type of this process\n"
+ " --"OPT_FILE_PREFIX": prefix for hugepage filenames\n"
+ "\nEAL options for DEBUG use only:\n"
+ " --"OPT_NO_HUGE" : use malloc instead of hugetlbfs\n"
+ " --"OPT_NO_PCI" : disable pci\n"
+ " --"OPT_NO_HPET" : disable hpet\n"
+ " --"OPT_NO_SHCONF": no shared config (mmap'd files)\n\n",
+ prgname);
+}
+
+/*
+ * Parse the coremask given as argument (hexadecimal string) and fill
+ * the global configuration (core role and core count) with the parsed
+ * value.
+ */
+static int
+eal_parse_coremask(const char *coremask)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ unsigned i;
+ char *end = NULL;
+ unsigned long long cm;
+ unsigned count = 0;
+
+ /* parse hexadecimal string */
+ cm = strtoull(coremask, &end, 16);
+ if ((coremask[0] == '\0') || (end == NULL) || (*end != '\0') || (cm == 0))
+ return -1;
+
+ RTE_LOG(DEBUG, EAL, "coremask set to %llx\n", cm);
+ /* set core role and core count */
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if ((1ULL << i) & cm) {
+ if (count == 0)
+ cfg->master_lcore = i;
+ cfg->lcore_role[i] = ROLE_RTE;
+ count++;
+ }
+ else {
+ cfg->lcore_role[i] = ROLE_OFF;
+ }
+ }
+ return 0;
+}
+
+static inline uint64_t
+eal_get_hugepage_mem_size(void)
+{
+ uint64_t size = 0;
+ unsigned i;
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++){
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+ if (hpi->hugedir != NULL)
+ size += hpi->hugepage_sz * hpi->num_pages;
+ }
+
+ return (size);
+}
+
+static enum rte_proc_type_t
+eal_parse_proc_type(const char *arg)
+{
+ if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
+ return RTE_PROC_PRIMARY;
+ if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
+ return RTE_PROC_SECONDARY;
+ if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
+ return RTE_PROC_AUTO;
+
+ return RTE_PROC_INVALID;
+}
+
+static int
+eal_parse_blacklist(const char *input, struct rte_pci_addr *dev2bl)
+{
+ GET_BLACKLIST_FIELD(input, dev2bl->domain, UINT16_MAX, ':');
+ GET_BLACKLIST_FIELD(input, dev2bl->bus, UINT8_MAX, ':');
+ GET_BLACKLIST_FIELD(input, dev2bl->devid, UINT8_MAX, '.');
+ GET_BLACKLIST_FIELD(input, dev2bl->function, UINT8_MAX, 0);
+ return (0);
+}
+
+static ssize_t
+eal_parse_blacklist_opt(const char *optarg, size_t idx)
+{
+ if (idx >= sizeof (eal_dev_blacklist) / sizeof (eal_dev_blacklist[0])) {
+ RTE_LOG(ERR, EAL,
+ "%s - too many devices to blacklist...\n",
+ optarg);
+ return (-EINVAL);
+ } else if (eal_parse_blacklist(optarg, eal_dev_blacklist + idx) != 0) {
+ RTE_LOG(ERR, EAL,
+ "%s - invalid device to blacklist...\n",
+ optarg);
+ return (-EINVAL);
+ }
+
+ idx += 1;
+ return (idx);
+}
+
+
+/* Parse the argument given in the command line of the application */
+static int
+eal_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ int coremask_ok = 0;
+ ssize_t blacklist_index = 0;;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {OPT_NO_HUGE, 0, 0, 0},
+ {OPT_NO_PCI, 0, 0, 0},
+ {OPT_NO_HPET, 0, 0, 0},
+ {OPT_HUGE_DIR, 1, 0, 0},
+ {OPT_NO_SHCONF, 0, 0, 0},
+ {OPT_PROC_TYPE, 1, 0, 0},
+ {OPT_FILE_PREFIX, 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ internal_config.memory = 0;
+ internal_config.force_nrank = 0;
+ internal_config.force_nchannel = 0;
+ internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
+ internal_config.hugepage_dir = NULL;
+#ifdef RTE_LIBEAL_USE_HPET
+ internal_config.no_hpet = 0;
+#else
+ internal_config.no_hpet = 1;
+#endif
+
+ while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* blacklist */
+ case 'b':
+ if ((blacklist_index = eal_parse_blacklist_opt(optarg,
+ blacklist_index)) < 0) {
+ eal_usage(prgname);
+ return (-1);
+ }
+ break;
+ /* coremask */
+ case 'c':
+ if (eal_parse_coremask(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid coremask\n");
+ eal_usage(prgname);
+ return -1;
+ }
+ coremask_ok = 1;
+ break;
+ /* size of memory */
+ case 'm':
+ internal_config.memory = atoi(optarg);
+ internal_config.memory *= 1024ULL;
+ internal_config.memory *= 1024ULL;
+ break;
+ /* force number of channels */
+ case 'n':
+ internal_config.force_nchannel = atoi(optarg);
+ if (internal_config.force_nchannel == 0 ||
+ internal_config.force_nchannel > 4) {
+ RTE_LOG(ERR, EAL, "invalid channel number\n");
+ eal_usage(prgname);
+ return -1;
+ }
+ break;
+ /* force number of ranks */
+ case 'r':
+ internal_config.force_nrank = atoi(optarg);
+ if (internal_config.force_nrank == 0 ||
+ internal_config.force_nrank > 16) {
+ RTE_LOG(ERR, EAL, "invalid rank number\n");
+ eal_usage(prgname);
+ return -1;
+ }
+ break;
+ case 'v':
+ /* since message is explicitly requested by user, we
+ * write message at highest log level so it can always be seen
+ * even if info or warning messages are disabled */
+ RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
+ break;
+
+ /* long options */
+ case 0:
+ if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
+ internal_config.no_hugetlbfs = 1;
+ }
+ else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
+ internal_config.no_pci = 1;
+ }
+ else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) {
+ internal_config.no_hpet = 1;
+ }
+ else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) {
+ internal_config.no_shconf = 1;
+ }
+ else if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) {
+ internal_config.hugepage_dir = optarg;
+ }
+ else if (!strcmp(lgopts[option_index].name, OPT_PROC_TYPE)) {
+ internal_config.process_type = eal_parse_proc_type(optarg);
+ }
+ else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) {
+ internal_config.hugefile_prefix = optarg;
+ }
+ break;
+
+ default:
+ eal_usage(prgname);
+ return -1;
+ }
+ }
+
+ /* sanity checks */
+ if (!coremask_ok) {
+ RTE_LOG(ERR, EAL, "coremask not specified\n");
+ eal_usage(prgname);
+ return -1;
+ }
+ if (internal_config.process_type == RTE_PROC_AUTO){
+ internal_config.process_type = eal_proc_type_detect();
+ }
+ if (internal_config.process_type == RTE_PROC_INVALID){
+ RTE_LOG(ERR, EAL, "Invalid process type specified\n");
+ eal_usage(prgname);
+ return -1;
+ }
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ internal_config.force_nchannel == 0) {
+ RTE_LOG(ERR, EAL, "Number of memory channels (-n) not specified\n");
+ eal_usage(prgname);
+ return -1;
+ }
+ if (index(internal_config.hugefile_prefix,'%') != NULL){
+ RTE_LOG(ERR, EAL, "Invalid char, '%%', in '"OPT_FILE_PREFIX"' option\n");
+ eal_usage(prgname);
+ return -1;
+ }
+
+ if (blacklist_index > 0)
+ rte_eal_pci_set_blacklist(eal_dev_blacklist, blacklist_index);
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+ int i, fctret, ret;
+ pthread_t thread_id;
+
+ thread_id = pthread_self();
+
+ if (rte_eal_log_early_init() < 0)
+ rte_panic("Cannot init early logs\n");
+
+ fctret = eal_parse_args(argc, argv);
+ if (fctret < 0)
+ exit(1);
+
+ if (eal_hugepage_info_init() < 0)
+ rte_panic("Cannot get hugepage information\n");
+
+ if (internal_config.memory == 0) {
+ if (internal_config.no_hugetlbfs)
+ internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
+ else
+ internal_config.memory = eal_get_hugepage_mem_size();
+ }
+
+ rte_srand(rte_rdtsc());
+ rte_config_init();
+
+ if (rte_eal_cpu_init() < 0)
+ rte_panic("Cannot detect lcores\n");
+
+ if (rte_eal_memory_init() < 0)
+ rte_panic("Cannot init memory\n");
+
+ if (rte_eal_memzone_init() < 0)
+ rte_panic("Cannot init memzone\n");
+
+ if (rte_eal_tailqs_init() < 0)
+ rte_panic("Cannot init tail queues for objects\n");
+
+ if (rte_eal_log_init() < 0)
+ rte_panic("Cannot init logs\n");
+
+ if (rte_eal_alarm_init() < 0)
+ rte_panic("Cannot init interrupt-handling thread\n");
+
+ if (rte_eal_intr_init() < 0)
+ rte_panic("Cannot init interrupt-handling thread\n");
+
+ if (rte_eal_hpet_init() < 0)
+ rte_panic("Cannot init HPET\n");
+
+ if (rte_eal_pci_init() < 0)
+ rte_panic("Cannot init PCI\n");
+
+ RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
+ rte_config.master_lcore, (int)thread_id);
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+
+ /*
+ * create communication pipes between master thread
+ * and children
+ */
+ if (pipe(lcore_config[i].pipe_master2slave) < 0)
+ rte_panic("Cannot create pipe\n");
+ if (pipe(lcore_config[i].pipe_slave2master) < 0)
+ rte_panic("Cannot create pipe\n");
+
+ lcore_config[i].state = WAIT;
+
+ /* create a thread for each lcore */
+ ret = pthread_create(&lcore_config[i].thread_id, NULL,
+ eal_thread_loop, NULL);
+ if (ret != 0)
+ rte_panic("Cannot create thread\n");
+ }
+
+ eal_thread_init_master(rte_config.master_lcore);
+
+ return fctret;
+}
+
+/* get core role */
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned lcore_id)
+{
+ return (rte_config.lcore_role[lcore_id]);
+}
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+ return (rte_config.process_type);
+}
+
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
new file mode 100644
index 0000000..f2eabf6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -0,0 +1,232 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+#include <sys/timerfd.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_interrupts.h>
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_per_lcore.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include <eal_private.h>
+
+#define NS_PER_US 1000
+#define US_PER_MS 1000
+#define MS_PER_S 1000
+#define US_PER_S (US_PER_MS * MS_PER_S)
+
+struct alarm_entry {
+ LIST_ENTRY(alarm_entry) next;
+ struct timeval time;
+ rte_eal_alarm_callback cb_fn;
+ void *cb_arg;
+ volatile int executing;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static int handler_registered = 0;
+static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg);
+
+int
+rte_eal_alarm_init(void)
+{
+ intr_handle.type = RTE_INTR_HANDLE_ALARM;
+ /* create a timerfd file descriptor */
+ intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+ if (intr_handle.fd == -1)
+ goto error;
+
+ return 0;
+
+error:
+ rte_errno = errno;
+ return -1;
+}
+
+static void
+eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
+ void *arg __rte_unused)
+{
+ struct timeval now;
+ struct alarm_entry *ap;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ while ((ap = LIST_FIRST(&alarm_list)) !=NULL &&
+ gettimeofday(&now, NULL) == 0 &&
+ (ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
+ ap->time.tv_usec <= now.tv_usec))){
+ ap->executing = 1;
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ ap->cb_fn(ap->cb_arg);
+
+ rte_spinlock_lock(&alarm_list_lk);
+ LIST_REMOVE(ap, next);
+ rte_free(ap);
+ }
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ struct itimerspec atime = { .it_interval = { 0, 0 } };
+
+ ap = LIST_FIRST(&alarm_list);
+ atime.it_value.tv_sec = ap->time.tv_sec;
+ atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US;
+ /* perform borrow for subtraction if necessary */
+ if (now.tv_usec > ap->time.tv_usec)
+ atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US;
+
+ atime.it_value.tv_sec -= now.tv_sec;
+ atime.it_value.tv_nsec -= now.tv_usec * NS_PER_US;
+ timerfd_settime(intr_handle.fd, 0, &atime, NULL);
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+}
+
+int
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct timeval now;
+ int ret = 0;
+ struct alarm_entry *ap, *new_alarm;
+
+ /* Check parameters, including that us won't cause a uint64_t overflow */
+ if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+ return -EINVAL;
+
+ new_alarm = rte_malloc(NULL, sizeof(*new_alarm), 0);
+ if (new_alarm == NULL)
+ return -ENOMEM;
+
+ /* use current time to calculate absolute time of alarm */
+ gettimeofday(&now, NULL);
+
+ new_alarm->cb_fn = cb_fn;
+ new_alarm->cb_arg = cb_arg;
+ new_alarm->time.tv_usec = (now.tv_usec + us) % US_PER_S;
+ new_alarm->time.tv_sec = now.tv_sec + ((now.tv_usec + us) / US_PER_S);
+ new_alarm->executing = 0;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ if (!handler_registered) {
+ ret |= rte_intr_callback_register(&intr_handle,
+ eal_alarm_callback, NULL);
+ handler_registered = (ret == 0) ? 1 : 0;
+ }
+
+ if (LIST_EMPTY(&alarm_list))
+ LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+ else {
+ LIST_FOREACH(ap, &alarm_list, next) {
+ if (ap->time.tv_sec > new_alarm->time.tv_sec ||
+ (ap->time.tv_sec == new_alarm->time.tv_sec &&
+ ap->time.tv_usec > new_alarm->time.tv_usec)){
+ LIST_INSERT_BEFORE(ap, new_alarm, next);
+ break;
+ }
+ if (LIST_NEXT(ap, next) == NULL) {
+ LIST_INSERT_AFTER(ap, new_alarm, next);
+ break;
+ }
+ }
+ }
+
+ if (LIST_FIRST(&alarm_list) == new_alarm) {
+ struct itimerspec alarm_time = {
+ .it_interval = {0, 0},
+ .it_value = {
+ .tv_sec = us / US_PER_S,
+ .tv_nsec = (us % US_PER_S) * NS_PER_US,
+ },
+ };
+ ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL);
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct alarm_entry *ap, *ap_prev;
+ int count = 0;
+
+ if (!cb_fn)
+ return -1;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ /* remove any matches at the start of the list */
+ while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
+ cb_fn == ap->cb_fn && ap->executing == 0 &&
+ (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+ LIST_REMOVE(ap, next);
+ rte_free(ap);
+ count++;
+ }
+ ap_prev = ap;
+
+ /* now go through list, removing entries not at start */
+ LIST_FOREACH(ap, &alarm_list, next) {
+ /* this won't be true first time through */
+ if (cb_fn == ap->cb_fn && ap->executing == 0 &&
+ (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+ LIST_REMOVE(ap,next);
+ rte_free(ap);
+ count++;
+ ap = ap_prev;
+ }
+ ap_prev = ap;
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+ return count;
+}
+
diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c
new file mode 100644
index 0000000..c05341d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_debug.c
@@ -0,0 +1,114 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <execinfo.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+
+#define BACKTRACE_SIZE 256
+
+/* dump the stack of the calling core */
+void rte_dump_stack(void)
+{
+ void *func[BACKTRACE_SIZE];
+ char **symb = NULL;
+ int size;
+
+ size = backtrace(func, BACKTRACE_SIZE);
+ symb = backtrace_symbols(func, size);
+ while (size > 0) {
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL,
+ "%d: [%s]\n", size, symb[size - 1]);
+ size --;
+ }
+}
+
+/* not implemented in this environment */
+void rte_dump_registers(void)
+{
+ return;
+}
+
+/* call abort(), it will generate a coredump if enabled */
+void __rte_panic(const char *funcname, const char *format, ...)
+{
+ va_list ap;
+
+ /* disable history */
+ rte_log_set_history(0);
+
+ rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+}
+
+/*
+ * Like rte_panic this terminates the application. However, no traceback is
+ * provided and no core-dump is generated.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+{
+ va_list ap;
+
+ /* disable history */
+ rte_log_set_history(0);
+
+ if (exit_code != 0)
+ RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\nCause: ", exit_code);
+
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+
+#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR
+ exit(exit_code);
+#else
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+#endif
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_hpet.c b/lib/librte_eal/linuxapp/eal/eal_hpet.c
new file mode 100644
index 0000000..aa686b1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_hpet.c
@@ -0,0 +1,232 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_tailq.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+#define DEV_HPET "/dev/hpet"
+
+/* Maximum number of counters. */
+#define HPET_TIMER_NUM 3
+
+/* General capabilities register */
+#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */
+#define CLK_PERIOD_MASK 0xffffffff00000000ULL /* Clock period mask. */
+#define COUNT_SIZE_CAP_SHIFT 13 /* Count size capa. shift. */
+#define COUNT_SIZE_CAP_MASK 0x0000000000002000ULL /* Count size capa. mask. */
+
+/**
+ * HPET timer registers. From the Intel IA-PC HPET (High Precision Event
+ * Timers) Specification.
+ */
+struct eal_hpet_regs {
+ /* Memory-mapped, software visible registers */
+ uint64_t capabilities; /**< RO General Capabilities Register. */
+ uint64_t reserved0; /**< Reserved for future use. */
+ uint64_t config; /**< RW General Configuration Register. */
+ uint64_t reserved1; /**< Reserved for future use. */
+ uint64_t isr; /**< RW Clear General Interrupt Status. */
+ uint64_t reserved2[25]; /**< Reserved for future use. */
+ union {
+ uint64_t counter; /**< RW Main Counter Value Register. */
+ struct {
+ uint32_t counter_l; /**< RW Main Counter Low. */
+ uint32_t counter_h; /**< RW Main Counter High. */
+ };
+ };
+ uint64_t reserved3; /**< Reserved for future use. */
+ struct {
+ uint64_t config; /**< RW Timer Config and Capability Reg. */
+ uint64_t comp; /**< RW Timer Comparator Value Register. */
+ uint64_t fsb; /**< RW FSB Interrupt Route Register. */
+ uint64_t reserved4; /**< Reserved for future use. */
+ } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */
+};
+
+/* Mmap'd hpet registers */
+static volatile struct eal_hpet_regs *eal_hpet = NULL;
+
+/* Period at which the counter increments in femtoseconds (10^-15 seconds). */
+static uint32_t eal_hpet_resolution_fs = 0;
+
+/* Frequency of the counter in Hz */
+static uint64_t eal_hpet_resolution_hz = 0;
+
+/* Incremented 4 times during one 32bits hpet full count */
+static uint32_t eal_hpet_msb;
+
+static pthread_t msb_inc_thread_id;
+
+/*
+ * This function runs on a specific thread to update a global variable
+ * containing used to process MSB of the HPET (unfortunatelly, we need
+ * this because hpet is 32 bits by default under linux).
+ */
+static __attribute__((noreturn)) void *
+hpet_msb_inc(__attribute__((unused)) void *arg)
+{
+ uint32_t t;
+
+ while (1) {
+ t = (eal_hpet->counter_l >> 30);
+ if (t != (eal_hpet_msb & 3))
+ eal_hpet_msb ++;
+ sleep(10);
+ }
+}
+
+static inline void
+set_rdtsc_freq(void)
+{
+ uint64_t start;
+
+ start = rte_rdtsc();
+ sleep(1);
+ eal_hpet_resolution_hz = rte_rdtsc() - start;
+ eal_hpet_resolution_fs = (uint32_t)
+ ((1.0 / eal_hpet_resolution_hz) / 1e-15);
+}
+
+/*
+ * Open and mmap /dev/hpet (high precision event timer) that will
+ * provide our time reference.
+ */
+int
+rte_eal_hpet_init(void)
+{
+ int fd, ret;
+
+ if (internal_config.no_hpet) {
+ goto use_rdtsc;
+ }
+
+ fd = open(DEV_HPET, O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(WARNING, EAL, "WARNING: Cannot open "DEV_HPET": %s! "
+ "The TSC will be used instead.\n",
+ strerror(errno));
+ goto use_rdtsc;
+ }
+ eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0);
+ if (eal_hpet == MAP_FAILED) {
+ RTE_LOG(WARNING, EAL, "WARNING: Cannot mmap "DEV_HPET"! "
+ "The TSC will be used instead.\n");
+ close(fd);
+ goto use_rdtsc;
+ }
+ close(fd);
+
+ eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities &
+ CLK_PERIOD_MASK) >>
+ CLK_PERIOD_SHIFT);
+
+ eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) /
+ (uint64_t)eal_hpet_resolution_fs;
+
+ eal_hpet_msb = (eal_hpet->counter_l >> 30);
+
+ /* create a thread that will increment a global variable for
+ * msb (hpet is 32 bits by default under linux) */
+ ret = pthread_create(&msb_inc_thread_id, NULL, hpet_msb_inc, NULL);
+ if (ret < 0) {
+ RTE_LOG(WARNING, EAL, "WARNING: Cannot create HPET timer thread! "
+ "The TSC will be used instead.\n");
+ goto use_rdtsc;
+ }
+
+ return 0;
+
+use_rdtsc:
+ internal_config.no_hpet = 1;
+ set_rdtsc_freq();
+ return 0;
+}
+
+uint64_t
+rte_get_hpet_hz(void)
+{
+ return eal_hpet_resolution_hz;
+}
+
+uint64_t
+rte_get_hpet_cycles(void)
+{
+ uint32_t t, msb;
+ uint64_t ret;
+
+ if(internal_config.no_hpet)
+ /* fallback to rdtsc */
+ return rte_rdtsc();
+
+ t = eal_hpet->counter_l;
+ msb = eal_hpet_msb;
+ ret = (msb + 2 - (t >> 30)) / 4;
+ ret <<= 32;
+ ret += t;
+ return ret;
+}
+
+void
+rte_delay_us(unsigned us)
+{
+ uint64_t start;
+ uint64_t ticks;
+ ticks = (uint64_t)us * 1000ULL * 1000ULL * 1000ULL;
+ ticks /= eal_hpet_resolution_fs;
+ start = rte_get_hpet_cycles();
+ while ((rte_get_hpet_cycles() - start) < ticks)
+ rte_pause();
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
new file mode 100644
index 0000000..d1ed49a
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -0,0 +1,229 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <string.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include "rte_memory.h"
+#include "rte_memzone.h"
+#include "rte_tailq.h"
+#include "rte_eal.h"
+#include "rte_launch.h"
+#include "rte_per_lcore.h"
+#include "rte_lcore.h"
+#include "rte_debug.h"
+#include "rte_log.h"
+#include "rte_common.h"
+#include "rte_string_fns.h"
+#include "eal_internal_cfg.h"
+#include "eal_hugepages.h"
+
+static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
+
+static int32_t
+get_num_hugepages(const char *subdir)
+{
+ const char nr_hp_file[] = "nr_hugepages";
+ char path[BUFSIZ];
+ unsigned num_pages = 0;
+
+ rte_snprintf(path, sizeof(path), "%s/%s/%s",
+ sys_dir_path, subdir, nr_hp_file);
+ FILE *fd = fopen(path, "r");
+ if (fd == NULL || fscanf(fd, "%u", &num_pages) != 1)
+ rte_panic("Error reading file '%s'\n", path);
+ fclose(fd);
+
+ return num_pages;
+}
+
+static uint64_t
+get_default_hp_size(void)
+{
+ const char proc_meminfo[] = "/proc/meminfo";
+ const char str_hugepagesz[] = "Hugepagesize:";
+ unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1;
+ char buffer[256];
+ unsigned long long size = 0;
+
+ FILE *fd = fopen(proc_meminfo, "r");
+ if (fd == NULL)
+ rte_panic("Cannot open %s\n", proc_meminfo);
+ while(fgets(buffer, sizeof(buffer), fd)){
+ if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){
+ size = rte_str_to_size(&buffer[hugepagesz_len]);
+ break;
+ }
+ }
+ fclose(fd);
+ if (size == 0)
+ rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo);
+ return size;
+}
+
+static const char *
+get_hugepage_dir(uint64_t hugepage_sz)
+{
+ enum proc_mount_fieldnames {
+ DEVICE = 0,
+ MOUNTPT,
+ FSTYPE,
+ OPTIONS,
+ _FIELDNAME_MAX
+ };
+ static uint64_t default_size = 0;
+ const char proc_mounts[] = "/proc/mounts";
+ const char hugetlbfs_str[] = "hugetlbfs";
+ const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1;
+ const char pagesize_opt[] = "pagesize=";
+ const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1;
+ const char split_tok = ' ';
+ char *splitstr[_FIELDNAME_MAX];
+ char buf[BUFSIZ];
+ char *retval = NULL;
+
+ FILE *fd = fopen(proc_mounts, "r");
+ if (fd == NULL)
+ rte_panic("Cannot open %s\n", proc_mounts);
+
+ if (default_size == 0)
+ default_size = get_default_hp_size();
+
+ while (fgets(buf, sizeof(buf), fd)){
+ if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX,
+ split_tok) != _FIELDNAME_MAX) {
+ RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts);
+ break; /* return NULL */
+ }
+
+ /* we have a specified --huge-dir option, only examine that dir */
+ if (internal_config.hugepage_dir != NULL &&
+ strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0)
+ continue;
+
+ if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){
+ const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt);
+
+ /* if no explicit page size, the default page size is compared */
+ if (pagesz_str == NULL){
+ if (hugepage_sz == default_size){
+ retval = strdup(splitstr[MOUNTPT]);
+ break;
+ }
+ }
+ /* there is an explicit page size, so check it */
+ else {
+ uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
+ if (pagesz == hugepage_sz) {
+ retval = strdup(splitstr[MOUNTPT]);
+ break;
+ }
+ }
+ } /* end if strncmp hugetlbfs */
+ } /* end while fgets */
+
+ fclose(fd);
+ return retval;
+}
+
+static inline void
+swap_hpi(struct hugepage_info *a, struct hugepage_info *b)
+{
+ char buf[sizeof(*a)];
+ memcpy(buf, a, sizeof(*a));
+ memcpy(a, b, sizeof(*a));
+ memcpy(b, buf, sizeof(*a));
+}
+
+int
+eal_hugepage_info_init(void)
+{
+ const char dirent_start_text[] = "hugepages-";
+ const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
+ unsigned i, num_sizes = 0;
+
+ DIR *dir = opendir(sys_dir_path);
+ if (dir == NULL)
+ rte_panic("Cannot open directory %s to read system hugepage info\n",
+ sys_dir_path);
+
+ struct dirent *dirent = readdir(dir);
+ while(dirent != NULL){
+ if (strncmp(dirent->d_name, dirent_start_text, dirent_start_len) == 0){
+ struct hugepage_info *hpi = \
+ &internal_config.hugepage_info[num_sizes];
+ hpi->hugepage_sz = rte_str_to_size(&dirent->d_name[dirent_start_len]);
+ hpi->num_pages = get_num_hugepages(dirent->d_name);
+ hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz);
+ if (hpi->hugedir == NULL){
+ RTE_LOG(INFO, EAL, "%u hugepages of size %llu reserved, "\
+ "but no mounted hugetlbfs found for that size\n",
+ hpi->num_pages,
+ (unsigned long long)hpi->hugepage_sz);
+ hpi->num_pages = 0;
+ } else
+ num_sizes++;
+ }
+ dirent = readdir(dir);
+ }
+ closedir(dir);
+ internal_config.num_hugepage_sizes = num_sizes;
+
+ /* sort the page directory entries by size, largest to smallest */
+ for (i = 0; i < num_sizes; i++){
+ unsigned j;
+ for (j = i+1; j < num_sizes; j++)
+ if (internal_config.hugepage_info[j-1].hugepage_sz < \
+ internal_config.hugepage_info[j].hugepage_sz)
+ swap_hpi(&internal_config.hugepage_info[j-1],
+ &internal_config.hugepage_info[j]);
+ }
+
+ /* now we have all info, check we have at least one valid size */
+ for (i = 0; i < num_sizes; i++)
+ if (internal_config.hugepage_info[i].hugedir != NULL &&
+ internal_config.hugepage_info[i].num_pages > 0)
+ return 0;
+ /* no valid hugepage mounts available, return error */
+ return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
new file mode 100644
index 0000000..8ff2289
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -0,0 +1,540 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include <malloc.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <sys/epoll.h>
+#include <sys/signalfd.h>
+
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_pci.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+
+#include "eal_private.h"
+
+#define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
+
+/**
+ * union for pipe fds.
+ */
+union intr_pipefds{
+ struct {
+ int pipefd[2];
+ };
+ struct {
+ int readfd;
+ int writefd;
+ };
+};
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+ int uio_intr_count; /* for uio device */
+ uint64_t timerfd_num; /* for timerfd */
+ char charbuf[16]; /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+ TAILQ_ENTRY(rte_intr_callback) next;
+ rte_intr_callback_fn cb_fn; /**< callback address */
+ void *cb_arg; /**< parameter for callback */
+};
+
+struct rte_intr_source {
+ TAILQ_ENTRY(rte_intr_source) next;
+ struct rte_intr_handle intr_handle; /**< interrupt handle */
+ struct rte_intr_cb_list callbacks; /**< user callbacks */
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* union buffer for pipe read/write */
+static union intr_pipefds intr_pipe;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+int
+rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg)
+{
+ int ret = -1;
+ struct rte_intr_source *src;
+ int wake_thread = 0;
+
+ /* first do parameter checking */
+ if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Registering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ /* allocate a new interrupt callback entity */
+ struct rte_intr_callback *callback =
+ rte_zmalloc("interrupt callback list",
+ sizeof(*callback), 0);
+ if (callback == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ return -ENOMEM;
+ }
+ callback->cb_fn = cb;
+ callback->cb_arg = cb_arg;
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if there is at least one callback registered for the fd */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd) {
+ if (src->callbacks.tqh_first == NULL)
+ /* we had no interrupts for this */
+ wake_thread = 1;
+
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ break;
+ }
+
+ /* No callback registered for this fd */
+ if (src == NULL){
+ /* no existing callbacks for this - add new source */
+ src = rte_zmalloc("interrupt source list", sizeof(*src), 0);
+ if (src == NULL){
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ ret = -ENOMEM;
+ goto error;
+ }
+ src->intr_handle = *intr_handle;
+ TAILQ_INIT(&src->callbacks);
+
+ TAILQ_INSERT_TAIL(&intr_sources, src, next);
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ wake_thread = 1;
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+ /**
+ * check if need to notify the pipe fd waited by epoll_wait to
+ * rebuild the wait list.
+ */
+ if (wake_thread)
+ if (write(intr_pipe.writefd, "1", 1) < 0)
+ return -EPIPE;
+
+ return 0;
+
+error:
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+}
+
+int
+rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb_fn, void *cb_arg)
+{
+ int ret = -1;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb;
+
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+ goto error;
+ }
+
+ ret = 0;
+ TAILQ_FOREACH(cb, &src->callbacks, next) {
+ if (cb->cb_fn != cb_fn)
+ continue;
+ if (cb_arg == (void *)-1 || cb->cb_arg == cb_arg) {
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ rte_free(cb);
+ ret ++;
+ }
+
+ if (src->callbacks.tqh_first == NULL) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ rte_free(src);
+ }
+ }
+
+ /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
+ if (write(intr_pipe.writefd, "1", 1) < 0) {
+ ret = -EPIPE;
+ goto error;
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+
+error:
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+}
+
+int
+rte_intr_enable(struct rte_intr_handle *intr_handle)
+{
+ const int value = 1;
+
+ if (!intr_handle || intr_handle->fd < 0)
+ return -1;
+
+ switch (intr_handle->type){
+ /* write to the uio fd to enable the interrupt */
+ case RTE_INTR_HANDLE_UIO:
+ if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Error enabling interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+ /* unkown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_intr_disable(struct rte_intr_handle *intr_handle)
+{
+ const int value = 0;
+
+ if (!intr_handle || intr_handle->fd < 0)
+ return -1;
+
+ switch (intr_handle->type){
+ /* write to the uio fd to disable the interrupt */
+ case RTE_INTR_HANDLE_UIO:
+ if (write(intr_handle->fd, &value, sizeof(value)) < 0){
+ RTE_LOG(ERR, EAL,
+ "Error enabling interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+ /* unkown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+eal_intr_process_interrupts(struct epoll_event *events, int nfds)
+{
+ int n, i, active_cb, bytes_read;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_callback active_cbs[32];
+
+ for (n = 0; n < nfds; n++) {
+ /**
+ * if the pipe fd is ready to read, return out to
+ * rebuild the wait list.
+ */
+ if (events[n].data.fd == intr_pipe.readfd){
+ int r = read(intr_pipe.readfd, buf.charbuf,
+ sizeof(buf.charbuf));
+ RTE_SET_USED(r);
+ return -1;
+ }
+ rte_spinlock_lock(&intr_lock);
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd ==
+ events[n].data.fd)
+ break;
+ if (src == NULL){
+ rte_spinlock_unlock(&intr_lock);
+ continue;
+ }
+
+ /* for this source, make a copy of all the callbacks,
+ * then unlock the lock, so the callbacks can
+ * themselves manipulate the list for future
+ * instances.
+ */
+ active_cb = 0;
+ memset(active_cbs, 0, sizeof(active_cbs));
+ TAILQ_FOREACH(cb, &src->callbacks, next)
+ active_cbs[active_cb++] = *cb;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* set the length to be read dor different handle type */
+ switch (src->intr_handle.type) {
+ case RTE_INTR_HANDLE_UIO:
+ bytes_read = 4;
+ break;
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = sizeof(uint64_t);
+ break;
+ default:
+ bytes_read = 1;
+ break;
+ }
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(events[n].data.fd, &buf, bytes_read);
+ if (bytes_read < 0) {
+ RTE_LOG(ERR, EAL, "Error reading from file descriptor"
+ " %d, error: %d\n", events[n].data.fd, errno);
+ continue;
+ }
+ else if (bytes_read == 0) {
+ RTE_LOG(ERR, EAL,
+ "Read nothing from file descriptor %d.\n",
+ events[n].data.fd);
+ continue;
+ }
+ /**
+ * Finally, call all callbacks from the copy
+ * we made earlier.
+ */
+ for (i = 0; i < active_cb; i++) {
+ if (active_cbs[i].cb_fn == NULL)
+ continue;
+ active_cbs[i].cb_fn(&src->intr_handle,
+ active_cbs[i].cb_arg);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * It handles all the interrupts.
+ *
+ * @param pfd
+ * epoll file descriptor.
+ * @param totalfds
+ * The number of file descriptors added in epoll.
+ *
+ * @return
+ * void
+ */
+static void
+eal_intr_handle_interrupts(int pfd, unsigned totalfds)
+{
+ struct epoll_event events[totalfds];
+ int nfds = 0;
+
+ for(;;) {
+ nfds = epoll_wait(pfd, events, totalfds,
+ EAL_INTR_EPOLL_WAIT_FOREVER);
+ /* epoll_wait fail */
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "epoll_wait returns with fail\n");
+ return;
+ }
+ /* epoll_wait timeout, will never happens here */
+ else if (nfds == 0)
+ continue;
+ /* epoll_wait has at least one fd ready to read */
+ if (eal_intr_process_interrupts(events, nfds) < 0)
+ return;
+ }
+}
+
+/**
+ * It builds/rebuilds up the epoll file descriptor with all the
+ * file descriptors being waited on. Then handles the interrupts.
+ *
+ * @param arg
+ * pointer. (unused)
+ *
+ * @return
+ * never return;
+ */
+static __attribute__((noreturn)) void *
+eal_intr_thread_main(__rte_unused void *arg)
+{
+ struct epoll_event ev;
+
+ /* host thread, never break out */
+ for (;;) {
+ /* build up the epoll fd with all descriptors we are to
+ * wait on then pass it to the handle_interrupts function
+ */
+ static struct epoll_event pipe_event = {
+ .events = EPOLLIN | EPOLLPRI,
+ };
+ struct rte_intr_source *src;
+ unsigned numfds = 0;
+
+ /* create epoll fd */
+ int pfd = epoll_create(1);
+ if (pfd < 0)
+ rte_panic("Cannot create epoll instance\n");
+
+ pipe_event.data.fd = intr_pipe.readfd;
+ /**
+ * add pipe fd into wait list, this pipe is used to
+ * rebuild the wait list.
+ */
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
+ &pipe_event) < 0) {
+ rte_panic("Error adding fd to %d epoll_ctl, %s\n",
+ intr_pipe.readfd, strerror(errno));
+ }
+ numfds++;
+
+ rte_spinlock_lock(&intr_lock);
+
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ if (src->callbacks.tqh_first == NULL)
+ continue; /* skip those with no callbacks */
+ ev.events = EPOLLIN | EPOLLPRI;
+ ev.data.fd = src->intr_handle.fd;
+
+ /**
+ * add all the uio device file descriptor
+ * into wait list.
+ */
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD,
+ src->intr_handle.fd, &ev) < 0){
+ rte_panic("Error adding fd %d epoll_ctl, %s\n",
+ src->intr_handle.fd, strerror(errno));
+ }
+ else
+ numfds++;
+ }
+ rte_spinlock_unlock(&intr_lock);
+ /* serve the interrupt */
+ eal_intr_handle_interrupts(pfd, numfds);
+
+ /**
+ * when we return, we need to rebuild the
+ * list of fds to monitor.
+ */
+ close(pfd);
+ }
+}
+
+int
+rte_eal_intr_init(void)
+{
+ int ret = 0;
+
+ /* init the global interrupt source head */
+ TAILQ_INIT(&intr_sources);
+
+ /**
+ * create a pipe which will be waited by epoll and notified to
+ * rebuild the wait list of epoll.
+ */
+ if (pipe(intr_pipe.pipefd) < 0)
+ return -1;
+
+ /* create the host thread to wait/handle the interrupt */
+ ret = pthread_create(&intr_thread, NULL,
+ eal_intr_thread_main, NULL);
+ if (ret != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to create thread for interrupt handling\n");
+
+ return -ret;
+}
+
diff --git a/lib/librte_eal/linuxapp/eal/eal_lcore.c b/lib/librte_eal/linuxapp/eal/eal_lcore.c
new file mode 100644
index 0000000..dde9bc1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_lcore.c
@@ -0,0 +1,192 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+
+#define PROC_CPUINFO "/proc/cpuinfo"
+#define PROC_PROCESSOR_FMT ""
+
+/* parse one line and try to match "processor : %d". */
+static int
+parse_processor_id(const char *buf, unsigned *lcore_id)
+{
+ static const char _processor[] = "processor";
+ const char *s;
+
+ if (strncmp(buf, _processor, sizeof(_processor) - 1) != 0)
+ return -1;
+
+ s = strchr(buf, ':');
+ if (s == NULL)
+ return -1;
+
+ errno = 0;
+ *lcore_id = strtoul(s+1, NULL, 10);
+ if (errno != 0) {
+ *lcore_id = -1;
+ return -1;
+ }
+
+ return 0;
+}
+
+/* parse one line and try to match "physical id : %d". */
+static int
+parse_socket_id(const char *buf, unsigned *socket_id)
+{
+ static const char _physical_id[] = "physical id";
+ const char *s;
+
+ if (strncmp(buf, _physical_id, sizeof(_physical_id) - 1) != 0)
+ return -1;
+
+ s = strchr(buf, ':');
+ if (s == NULL)
+ return -1;
+
+ errno = 0;
+ *socket_id = strtoul(s+1, NULL, 10);
+ if (errno != 0)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Parse /proc/cpuinfo to get the number of physical and logical
+ * processors on the machine. The function will fill the cpu_info
+ * structure.
+ */
+int
+rte_eal_cpu_init(void)
+{
+ struct rte_config *config;
+ FILE *f;
+ char buf[BUFSIZ];
+ unsigned lcore_id = 0;
+ unsigned socket_id = 0;
+ unsigned count = 0;
+
+ /* get pointer to global configuration */
+ config = rte_eal_get_configuration();
+
+ /* open /proc/cpuinfo */
+ f = fopen(PROC_CPUINFO, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot find "PROC_CPUINFO"\n", __func__);
+ return -1;
+ }
+
+ /*
+ * browse lines of /proc/cpuinfo and fill memseg entries in
+ * global configuration
+ */
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+
+ if (parse_processor_id(buf, &lcore_id) == 0)
+ continue;
+
+ if (parse_socket_id(buf, &socket_id) == 0)
+ continue;
+
+ if (buf[0] == '\n') {
+ RTE_LOG(DEBUG, EAL, "Detected lcore %u on socket %u\n",
+ lcore_id, socket_id);
+ if (lcore_id >= RTE_MAX_LCORE) {
+ RTE_LOG(DEBUG, EAL,
+ "Skip lcore %u >= RTE_MAX_LCORE\n",
+ lcore_id);
+ continue;
+ }
+
+ /*
+ * In a virtualization environment, the socket ID
+ * reported by the system may not be linked to a real
+ * physical socket ID, and may be incoherent. So in this
+ * case, a default socket ID of 0 is assigned.
+ */
+ if (socket_id >= RTE_MAX_NUMA_NODES) {
+#ifdef CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID
+ socket_id = 0;
+#else
+ rte_panic("Socket ID (%u) is greater than "
+ "RTE_MAX_NUMA_NODES (%d)\n",
+ socket_id, RTE_MAX_NUMA_NODES);
+#endif
+ }
+
+ lcore_config[lcore_id].detected = 1;
+ lcore_config[lcore_id].socket_id = socket_id;
+
+ }
+ }
+
+ fclose(f);
+
+ /* disable lcores that were not detected */
+ RTE_LCORE_FOREACH(lcore_id) {
+
+ if (lcore_config[lcore_id].detected == 0) {
+ RTE_LOG(DEBUG, EAL, "Skip lcore %u (not detected)\n",
+ lcore_id);
+ config->lcore_role[lcore_id] = ROLE_OFF;
+ }
+ else
+ count ++;
+ }
+
+ config->lcore_count = count;
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
new file mode 100644
index 0000000..3d3d7ba
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -0,0 +1,137 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+/*
+ * default log function, used once mempool (hence log history) is
+ * available
+ */
+static ssize_t
+console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
+{
+ char copybuf[BUFSIZ + 1];
+ ssize_t ret;
+ uint32_t loglevel;
+
+ /* add this log in history */
+ rte_log_add_in_history(buf, size);
+
+ /* write on stdout */
+ ret = fwrite(buf, 1, size, stdout);
+ fflush(stdout);
+
+ /* truncate message if too big (should not happen) */
+ if (size > BUFSIZ)
+ size = BUFSIZ;
+
+ /* Syslog error levels are from 0 to 7, so subtract 1 to convert */
+ loglevel = rte_log_cur_msg_loglevel() - 1;
+ memcpy(copybuf, buf, size);
+ copybuf[size] = '\0';
+
+ /* write on syslog too */
+ syslog(loglevel, "%s", copybuf);
+
+ return ret;
+}
+
+static ssize_t
+console_log_read(__attribute__((unused)) void *c,
+ __attribute__((unused)) char *buf,
+ __attribute__((unused)) size_t size)
+{
+ return 0;
+}
+
+static int
+console_log_seek(__attribute__((unused)) void *c,
+ __attribute__((unused)) off64_t *offset,
+ __attribute__((unused)) int whence)
+{
+ return -1;
+}
+
+static int
+console_log_close(__attribute__((unused)) void *c)
+{
+ return 0;
+}
+
+static cookie_io_functions_t console_log_func = {
+ .read = console_log_read,
+ .write = console_log_write,
+ .seek = console_log_seek,
+ .close = console_log_close
+};
+
+/*
+ * set the log to default function, called during eal init process,
+ * once memzones are available.
+ */
+int
+rte_eal_log_init(void)
+{
+ FILE *log_stream;
+
+ log_stream = fopencookie(NULL, "w+", console_log_func);
+ if (log_stream == NULL)
+ return -1;
+
+ openlog("rte", LOG_NDELAY | LOG_PID, LOG_DAEMON);
+
+ if (rte_eal_common_log_init(log_stream) < 0)
+ return -1;
+
+ return 0;
+}
+
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
new file mode 100644
index 0000000..a47dab4
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -0,0 +1,796 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_fs_paths.h"
+#include "eal_hugepages.h"
+
+/**
+ * @file
+ * Huge page mapping under linux
+ *
+ * To reserve a big contiguous amount of memory, we use the hugepage
+ * feature of linux. For that, we need to have hugetlbfs mounted. This
+ * code will create many files in this directory (one per page) and
+ * map them in virtual memory. For each page, we will retrieve its
+ * physical address and remap it in order to have a virtual contiguous
+ * zone as well as a physical contiguous zone.
+ */
+
+
+#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
+
+/*
+ * Check whether address-space layout randomization is enabled in
+ * the kernel. This is important for multi-process as it can prevent
+ * two processes mapping data to the same virtual address
+ * Returns:
+ * 0 - address space randomization disabled
+ * 1/2 - address space randomization enabled
+ * negative error code on error
+ */
+static int
+aslr_enabled(void)
+{
+ char c;
+ int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+ retval = read(fd, &c, 1);
+ close(fd);
+ if (retval < 0)
+ return -errno;
+ if (retval == 0)
+ return -EIO;
+ switch (c) {
+ case '0' : return 0;
+ case '1' : return 1;
+ case '2' : return 2;
+ default: return -EINVAL;
+ }
+}
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is succesful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
+ */
+static void *
+get_virtual_area(uint64_t *size, uint64_t hugepage_sz)
+{
+ void *addr;
+ int fd;
+ long aligned_addr;
+
+ RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%"PRIx64" bytes\n", *size);
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd < 0){
+ RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
+ return NULL;
+ }
+ do {
+ addr = mmap(NULL, (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED)
+ *size -= hugepage_sz;
+ } while (addr == MAP_FAILED && *size > 0);
+
+ if (addr == MAP_FAILED) {
+ close(fd);
+ RTE_LOG(INFO, EAL, "Cannot get a virtual area\n");
+ return NULL;
+ }
+
+ munmap(addr, (*size) + hugepage_sz);
+ close(fd);
+
+ /* align addr to a huge page size boundary */
+ aligned_addr = (long)addr;
+ aligned_addr += (hugepage_sz - 1);
+ aligned_addr &= (~(hugepage_sz - 1));
+ addr = (void *)(aligned_addr);
+
+ RTE_LOG(INFO, EAL, "Virtual area found at %p (size = 0x%"PRIx64")\n",
+ addr, *size);
+
+ return addr;
+}
+
+/*
+ * Mmap all hugepages of hugepage table: it first open a file in
+ * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
+ * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
+ * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
+ * map continguous physical blocks in contiguous virtual blocks.
+ */
+static int
+map_all_hugepages(struct hugepage *hugepg_tbl,
+ struct hugepage_info *hpi, int orig)
+{
+ int fd;
+ unsigned i;
+ void *virtaddr;
+ void *vma_addr = NULL;
+ uint64_t vma_len = 0;
+
+ for (i = 0; i < hpi->num_pages; i++) {
+ uint64_t hugepage_sz = hpi->hugepage_sz;
+
+ if (orig) {
+ hugepg_tbl[i].file_id = i;
+ hugepg_tbl[i].size = hugepage_sz;
+ eal_get_hugefile_path(hugepg_tbl[i].filepath,
+ sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
+ hugepg_tbl[i].file_id);
+ hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
+ }
+#ifndef RTE_ARCH_X86_64
+ /* for 32-bit systems, don't remap 1G pages, just reuse original
+ * map address as final map address.
+ */
+ else if (hugepage_sz == RTE_PGSIZE_1G){
+ hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
+ hugepg_tbl[i].orig_va = NULL;
+ continue;
+ }
+#endif
+ else if (vma_len == 0) {
+ unsigned j, num_pages;
+
+ /* reserve a virtual area for next contiguous
+ * physical block: count the number of
+ * contiguous physical pages. */
+ for (j = i+1; j < hpi->num_pages ; j++) {
+ if (hugepg_tbl[j].physaddr !=
+ hugepg_tbl[j-1].physaddr + hugepage_sz)
+ break;
+ }
+ num_pages = j - i;
+ vma_len = num_pages * hugepage_sz;
+
+ /* get the biggest virtual memory area up to
+ * vma_len. If it fails, vma_addr is NULL, so
+ * let the kernel provide the address. */
+ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+ if (vma_addr == NULL)
+ vma_len = hugepage_sz;
+ }
+
+ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): open failed: %s", __func__,
+ strerror(errno));
+ return -1;
+ }
+
+ virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (virtaddr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s(): mmap failed: %s", __func__,
+ strerror(errno));
+ close(fd);
+ return -1;
+ }
+ if (orig) {
+ hugepg_tbl[i].orig_va = virtaddr;
+ memset(virtaddr, 0, hugepage_sz);
+ }
+ else {
+ hugepg_tbl[i].final_va = virtaddr;
+ }
+
+ vma_addr = (char *)vma_addr + hugepage_sz;
+ vma_len -= hugepage_sz;
+ close(fd);
+ }
+ return 0;
+}
+
+/* Unmap all hugepages from original mapping. */
+static int
+unmap_all_hugepages_orig(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned i;
+ for (i = 0; i < hpi->num_pages; i++) {
+ if (hugepg_tbl[i].orig_va) {
+ munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
+ hugepg_tbl[i].orig_va = NULL;
+ }
+ }
+ return 0;
+}
+
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value. We find
+ * it by browsing the /proc/self/pagemap special file.
+ */
+static int
+find_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+{
+ int fd;
+ unsigned i;
+ uint64_t page;
+ unsigned long virt_pfn;
+ int page_size;
+
+ /* standard page size */
+ page_size = getpagesize();
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ for (i = 0; i < hpi->num_pages; i++) {
+ off_t offset;
+ virt_pfn = (unsigned long)hugepg_tbl[i].orig_va /
+ page_size;
+ offset = sizeof(uint64_t) * virt_pfn;
+ if (lseek(fd, offset, SEEK_SET) != offset){
+ RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s",
+ __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+ if (read(fd, &page, sizeof(uint64_t)) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s",
+ __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ /*
+ * the pfn (page frame number) are bits 0-54 (see
+ * pagemap.txt in linux Documentation)
+ */
+ hugepg_tbl[i].physaddr = ((page & 0x7fffffffffffffULL) * page_size);
+ }
+ close(fd);
+ return 0;
+}
+
+/*
+ * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
+ * page.
+ */
+static int
+find_numasocket(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+{
+ int socket_id;
+ char *end, *nodestr;
+ unsigned i, hp_count = 0;
+ uint64_t virt_addr;
+ char buf[BUFSIZ];
+ char hugedir_str[PATH_MAX];
+ FILE *f;
+
+ f = fopen("/proc/self/numa_maps", "r");
+ if (f == NULL) {
+ RTE_LOG(INFO, EAL, "cannot open /proc/self/numa_maps,"
+ "consider that all memory is in socket_id 0");
+ return 0;
+ }
+
+ rte_snprintf(hugedir_str, sizeof(hugedir_str),
+ "%s/", hpi->hugedir);
+
+ /* parse numa map */
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+
+ /* ignore non huge page */
+ if (strstr(buf, " huge ") == NULL &&
+ strstr(buf, hugedir_str) == NULL)
+ continue;
+
+ /* get zone addr */
+ virt_addr = strtoull(buf, &end, 16);
+ if (virt_addr == 0 || end == buf) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+
+ /* get node id (socket id) */
+ nodestr = strstr(buf, " N");
+ if (nodestr == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+ nodestr += 2;
+ end = strstr(nodestr, "=");
+ if (end == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+ end[0] = '\0';
+ end = NULL;
+
+ socket_id = strtoul(nodestr, &end, 0);
+ if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+
+ /* if we find this page in our mappings, set socket_id */
+ for (i = 0; i < hpi->num_pages; i++) {
+ void *va = (void *)(unsigned long)virt_addr;
+ if (hugepg_tbl[i].orig_va == va) {
+ hugepg_tbl[i].socket_id = socket_id;
+ hp_count++;
+ }
+ }
+ }
+ if (hp_count < hpi->num_pages)
+ goto error;
+ fclose(f);
+ return 0;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+/*
+ * Sort the hugepg_tbl by physical address (lower addresses first). We
+ * use a slow algorithm, but we won't have millions of pages, and this
+ * is only done at init time.
+ */
+static int
+sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned i, j;
+ int smallest_idx;
+ uint64_t smallest_addr;
+ struct hugepage tmp;
+
+ for (i = 0; i < hpi->num_pages; i++) {
+ smallest_addr = 0;
+ smallest_idx = -1;
+
+ /*
+ * browse all entries starting at 'i', and find the
+ * entry with the smallest addr
+ */
+ for (j=i; j<hpi->num_pages; j++) {
+
+ if (smallest_addr == 0 ||
+ hugepg_tbl[j].physaddr < smallest_addr) {
+ smallest_addr = hugepg_tbl[j].physaddr;
+ smallest_idx = j;
+ }
+ }
+
+ /* should not happen */
+ if (smallest_idx == -1) {
+ RTE_LOG(ERR, EAL, "%s(): error in physaddr sorting\n", __func__);
+ return -1;
+ }
+
+ /* swap the 2 entries in the table */
+ memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage));
+ memcpy(&hugepg_tbl[smallest_idx], &hugepg_tbl[i],
+ sizeof(struct hugepage));
+ memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage));
+ }
+ return 0;
+}
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ *Used in this file to store the hugepage file map on disk
+ */
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+ void *retval;
+ int fd = open(filename, O_CREAT | O_RDWR, 0666);
+ if (fd < 0)
+ return NULL;
+ if (ftruncate(fd, mem_size) < 0) {
+ close(fd);
+ return NULL;
+ }
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ return retval;
+}
+
+/*
+ * This function takes in the list of hugepage sizes and the
+ * number of pages thereof, and calculates the best number of
+ * pages of each size to fulfill the request for <memory> ram
+ */
+static int
+calc_num_pages(uint64_t memory,
+ struct hugepage_info *hp_info,
+ struct hugepage_info *hp_used,
+ unsigned num_hp_info)
+{
+ unsigned i = 0;
+ int total_num_pages = 0;
+ if (num_hp_info == 0)
+ return -1;
+
+ for (i = 0; i < num_hp_info; i++){
+ hp_used[i].hugepage_sz = hp_info[i].hugepage_sz;
+ hp_used[i].hugedir = hp_info[i].hugedir;
+ hp_used[i].num_pages = RTE_MIN(memory / hp_info[i].hugepage_sz,
+ hp_info[i].num_pages);
+
+ memory -= hp_used[i].num_pages * hp_used[i].hugepage_sz;
+ total_num_pages += hp_used[i].num_pages;
+
+ /* check if we have met all memory requests */
+ if (memory == 0)
+ break;
+ /* check if we have any more pages left at this size, if so
+ * move on to next size */
+ if (hp_used[i].num_pages == hp_info[i].num_pages)
+ continue;
+ /* At this point we know that there are more pages available that are
+ * bigger than the memory we want, so lets see if we can get enough
+ * from other page sizes.
+ */
+ unsigned j;
+ uint64_t remaining_mem = 0;
+ for (j = i+1; j < num_hp_info; j++)
+ remaining_mem += hp_info[j].hugepage_sz * hp_info[j].num_pages;
+
+ /* is there enough other memory, if not allocate another page and quit*/
+ if (remaining_mem < memory){
+ memory -= hp_info[i].hugepage_sz;
+ hp_used[i].num_pages++;
+ total_num_pages++;
+ break; /* we are done */
+ }
+ }
+ return total_num_pages;
+}
+
+/*
+ * Prepare physical memory mapping: fill configuration structure with
+ * these infos, return 0 on success.
+ * 1. map N huge pages in separate files in hugetlbfs
+ * 2. find associated physical addr
+ * 3. find associated NUMA socket ID
+ * 4. sort all huge pages by physical address
+ * 5. remap these N huge pages in the correct order
+ * 6. unmap the first mapping
+ * 7. fill memsegs in configuration with contiguous zones
+ */
+static int
+rte_eal_hugepage_init(void)
+{
+ struct rte_mem_config *mcfg;
+ struct hugepage *hugepage;
+ struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+ int i, j, new_memseg;
+ int nrpages;
+ void *addr;
+
+ memset(used_hp, 0, sizeof(used_hp));
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* for debug purposes, hugetlbfs can be disabled */
+ if (internal_config.no_hugetlbfs) {
+ addr = malloc(internal_config.memory);
+ mcfg->memseg[0].phys_addr = (unsigned long)addr;
+ mcfg->memseg[0].addr = addr;
+ mcfg->memseg[0].len = internal_config.memory;
+ mcfg->memseg[0].socket_id = 0;
+ return 0;
+ }
+
+ nrpages = calc_num_pages(internal_config.memory,
+ &internal_config.hugepage_info[0], &used_hp[0],
+ internal_config.num_hugepage_sizes);
+ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i++)
+ RTE_LOG(INFO, EAL, "Requesting %u pages of size %"PRIu64"\n",
+ used_hp[i].num_pages, used_hp[i].hugepage_sz);
+
+ hugepage = create_shared_memory(eal_hugepage_info_path(),
+ nrpages * sizeof(struct hugepage));
+ if (hugepage == NULL)
+ return -1;
+ memset(hugepage, 0, nrpages * sizeof(struct hugepage));
+
+ unsigned hp_offset = 0; /* where we start the current page size entries */
+ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
+ struct hugepage_info *hpi = &used_hp[i];
+ if (hpi->num_pages == 0)
+ continue;
+
+ if (map_all_hugepages(&hugepage[hp_offset], hpi, 1) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ if (find_physaddr(&hugepage[hp_offset], hpi) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ if (find_numasocket(&hugepage[hp_offset], hpi) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ if (sort_by_physaddr(&hugepage[hp_offset], hpi) < 0)
+ goto fail;
+
+ if (map_all_hugepages(&hugepage[hp_offset], hpi, 0) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ if (unmap_all_hugepages_orig(&hugepage[hp_offset], hpi) < 0)
+ goto fail;
+
+ /* we have processed a num of hugepages of this size, so inc offset */
+ hp_offset += hpi->num_pages;
+ }
+
+ memset(mcfg->memseg, 0, sizeof(mcfg->memseg));
+ j = -1;
+ for (i = 0; i < nrpages; i++) {
+ new_memseg = 0;
+
+ /* if this is a new section, create a new memseg */
+ if (i == 0)
+ new_memseg = 1;
+ else if (hugepage[i].socket_id != hugepage[i-1].socket_id)
+ new_memseg = 1;
+ else if (hugepage[i].size != hugepage[i-1].size)
+ new_memseg = 1;
+ else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=
+ hugepage[i].size)
+ new_memseg = 1;
+ else if (((unsigned long)hugepage[i].final_va -
+ (unsigned long)hugepage[i-1].final_va) != hugepage[i].size)
+ new_memseg = 1;
+
+ if (new_memseg) {
+ j += 1;
+ if (j == RTE_MAX_MEMSEG)
+ break;
+
+ mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].addr = hugepage[i].final_va;
+ mcfg->memseg[j].len = hugepage[i].size;
+ mcfg->memseg[j].socket_id = hugepage[i].socket_id;
+ mcfg->memseg[j].hugepage_sz = hugepage[i].size;
+ }
+ /* continuation of previous memseg */
+ else {
+ mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
+ }
+ hugepage[i].memseg_id = j;
+ }
+
+ return 0;
+
+
+ fail:
+ return -1;
+}
+
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static off_t
+getFileSize(int fd)
+{
+ struct stat st;
+ if (fstat(fd, &st) < 0)
+ return 0;
+ return st.st_size;
+}
+
+/*
+ * This creates the memory mappings in the secondary process to match that of
+ * the server process. It goes through each memory segment in the DPDK runtime
+ * configuration and finds the hugepages which form that segment, mapping them
+ * in order to form a contiguous block in the virtual memory space
+ */
+static int
+rte_eal_hugepage_attach(void)
+{
+ const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ const struct hugepage *hp = NULL;
+ unsigned num_hp = 0;
+ unsigned i, s = 0; /* s used to track the segment number */
+ off_t size;
+ int fd, fd_zero = -1, fd_hugepage = -1;
+
+ if (aslr_enabled() > 0) {
+ RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
+ "(ASLR) is enabled in the kernel.\n");
+ RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory "
+ "into secondary processes\n");
+ }
+
+ fd_zero = open("/dev/zero", O_RDONLY);
+ if (fd_zero < 0) {
+ RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
+ goto error;
+ }
+ fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
+ if (fd_hugepage < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
+ goto error;
+ }
+
+ size = getFileSize(fd_hugepage);
+ hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
+ if (hp == NULL) {
+ RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
+ goto error;
+ }
+
+ num_hp = size / sizeof(struct hugepage);
+ RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp);
+
+ while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
+ void *addr, *base_addr;
+ uintptr_t offset = 0;
+
+ /* fdzero is mmapped to get a contiguous block of virtual addresses
+ * get a block of free memory of the appropriate size -
+ * use mmap to attempt to get an identical address as server.
+ */
+ base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
+ PROT_READ, MAP_PRIVATE, fd_zero, 0);
+ if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) {
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+ "in /dev/zero to requested address [%p]\n",
+ (unsigned long long)mcfg->memseg[s].len,
+ mcfg->memseg[s].addr);
+ if (aslr_enabled() > 0)
+ RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel "
+ "and retry running both primary and secondary processes\n");
+ goto error;
+ }
+ /* free memory so we can map the hugepages into the space */
+ munmap(base_addr, mcfg->memseg[s].len);
+
+ /* find the hugepages for this segment and map them
+ * we don't need to worry about order, as the server sorted the
+ * entries before it did the second mmap of them */
+ for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){
+ if (hp[i].memseg_id == (int)s){
+ fd = open(hp[i].filepath, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n",
+ hp[i].filepath);
+ goto error;
+ }
+ addr = mmap(RTE_PTR_ADD(base_addr, offset),
+ hp[i].size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0);
+ close(fd); /* close file both on success and on failure */
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Could not mmap %s\n",
+ hp[i].filepath);
+ goto error;
+ }
+ offset+=hp[i].size;
+ }
+ }
+ RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
+ (unsigned long long)mcfg->memseg[s].len);
+ s++;
+ }
+ close(fd_zero);
+ close(fd_hugepage);
+ return 0;
+
+error:
+ if (fd_zero >= 0)
+ close(fd_zero);
+ if (fd_hugepage >= 0)
+ close(fd_hugepage);
+ return -1;
+}
+
+static int
+rte_eal_memdevice_init(void)
+{
+ struct rte_config *config;
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ return 0;
+
+ config = rte_eal_get_configuration();
+ config->mem_config->nchannel = internal_config.force_nchannel;
+ config->mem_config->nrank = internal_config.force_nrank;
+
+ return 0;
+}
+
+
+/* init memory subsystem */
+int
+rte_eal_memory_init(void)
+{
+ const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ rte_eal_hugepage_init() :
+ rte_eal_hugepage_attach();
+ if (retval < 0)
+ return -1;
+
+ if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
+ return -1;
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
new file mode 100644
index 0000000..78687d6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -0,0 +1,770 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <dirent.h>
+#include <limits.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_common.h>
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+
+/**
+ * @file
+ * PCI probing under linux
+ *
+ * This code is used to simulate a PCI probe by parsing information in
+ * sysfs. Moreover, when a registered driver matches a device, the
+ * kernel driver currently using it is unloaded and replaced by
+ * igb_uio module, which is a very minimal userland driver for Intel
+ * network card, only providing access to PCI BAR to applications, and
+ * enabling bus master.
+ */
+
+
+#define PROC_MODULES "/proc/modules"
+
+#define IGB_UIO_NAME "igb_uio"
+
+#define UIO_NEWID "/sys/bus/pci/drivers/%s/new_id"
+#define UIO_BIND "/sys/bus/pci/drivers/%s/bind"
+
+/* maximum time to wait that /dev/uioX appears */
+#define UIO_DEV_WAIT_TIMEOUT 3 /* seconds */
+
+/*
+ * For multi-process we need to reproduce all PCI mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct uio_resource {
+ TAILQ_ENTRY(uio_resource) next;
+
+ struct rte_pci_addr pci_addr;
+ void *addr;
+ char path[PATH_MAX];
+ unsigned long size;
+ unsigned long offset;
+};
+
+TAILQ_HEAD(uio_res_list, uio_resource);
+
+static struct uio_res_list *uio_res_list = NULL;
+static int pci_parse_sysfs_value(const char *filename, unsigned long *val);
+
+/*
+ * Check that a kernel module is loaded. Returns 0 on success, or if the
+ * parameter is NULL, or -1 if the module is not loaded.
+ */
+static int
+pci_uio_check_module(const char *module_name)
+{
+ FILE *f;
+ unsigned i;
+ char buf[BUFSIZ];
+
+ if (module_name == NULL)
+ return 0;
+
+ f = fopen(PROC_MODULES, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot open "PROC_MODULES"\n");
+ return -1;
+ }
+
+ while(fgets(buf, sizeof(buf), f) != NULL) {
+
+ for (i = 0; i < sizeof(buf) && buf[i] != '\0'; i++) {
+ if (isspace(buf[i]))
+ buf[i] = '\0';
+ }
+
+ if (strncmp(buf, module_name, sizeof(buf)) == 0) {
+ fclose(f);
+ return 0;
+ }
+ }
+ fclose(f);
+ RTE_LOG(ERR, EAL, "Cannot find %s in "PROC_MODULES"\n", module_name);
+ return -1;
+}
+
+/* bind a PCI to the kernel module driver */
+static int
+pci_uio_bind_device(struct rte_pci_device *dev, const char *module_name)
+{
+ FILE *f;
+ int n;
+ char buf[BUFSIZ];
+ char uio_newid[PATH_MAX];
+ char uio_bind[PATH_MAX];
+ struct rte_pci_addr *loc = &dev->addr;
+
+ RTE_LOG(DEBUG, EAL, "bind PCI device "PCI_PRI_FMT" to %s driver\n",
+ loc->domain, loc->bus, loc->devid, loc->function, module_name);
+
+ n = rte_snprintf(uio_newid, sizeof(uio_newid), UIO_NEWID, module_name);
+ if ((n < 0) || (n >= (int)sizeof(uio_newid))) {
+ RTE_LOG(ERR, EAL, "Cannot rte_snprintf uio_newid name\n");
+ return -1;
+ }
+ n = rte_snprintf(uio_bind, sizeof(uio_bind), UIO_BIND, module_name);
+ if ((n < 0) || (n >= (int)sizeof(uio_bind))) {
+ RTE_LOG(ERR, EAL, "Cannot rte_snprintf uio_bind name\n");
+ return -1;
+ }
+
+ n = rte_snprintf(buf, sizeof(buf), "%x %x\n",
+ dev->id.vendor_id, dev->id.device_id);
+ if ((n < 0) || (n >= (int)sizeof(buf))) {
+ RTE_LOG(ERR, EAL, "Cannot rte_snprintf vendor_id/device_id\n");
+ return -1;
+ }
+
+ f = fopen(uio_newid, "w");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot open %s\n", uio_newid);
+ return -1;
+ }
+ if (fwrite(buf, n, 1, f) == 0) {
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+
+ f = fopen(uio_bind, "w");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot open %s\n", uio_bind);
+ return -1;
+ }
+ n = rte_snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
+ loc->domain, loc->bus, loc->devid, loc->function);
+ if ((n < 0) || (n >= (int)sizeof(buf))) {
+ RTE_LOG(ERR, EAL, "Cannot rte_snprintf PCI infos\n");
+ fclose(f);
+ return -1;
+ }
+ if (fwrite(buf, n, 1, f) == 0) {
+ fclose(f);
+ return -1;
+ }
+
+ RTE_LOG(DEBUG, EAL, "Device bound\n");
+
+ fclose(f);
+ return 0;
+}
+
+/* map a particular resource from a file */
+static void *
+pci_map_resource(struct rte_pci_device *dev, void *requested_addr, const char *devname,
+ unsigned long offset, unsigned long size)
+{
+ unsigned n;
+ int fd;
+ void *mapaddr;
+
+ /*
+ * open devname, and mmap it: it can take some time to
+ * appear, so we wait some time before returning an error
+ */
+ for (n=0; n<UIO_DEV_WAIT_TIMEOUT*10; n++) {
+ fd = open(devname, O_RDWR);
+ if (fd >= 0)
+ break;
+ if (errno != ENOENT)
+ break;
+ usleep(100000);
+ }
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno));
+ goto fail;
+ }
+
+ /* Map the PCI memory resource of device */
+ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, offset);
+ if (mapaddr == MAP_FAILED ||
+ (requested_addr != NULL && mapaddr != requested_addr)) {
+ RTE_LOG(ERR, EAL, "%s(): cannot mmap %s: %s\n", __func__,
+ devname, strerror(errno));
+ close(fd);
+ goto fail;
+ }
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* save fd if in primary process */
+ dev->intr_handle.fd = fd;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+ } else {
+ /* fd is not needed in slave process, close it */
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ close(fd);
+ }
+
+ RTE_LOG(DEBUG, EAL, "PCI memory mapped at %p\n", mapaddr);
+
+ return mapaddr;
+
+fail:
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+ return NULL;
+}
+/* map the PCI resource of a PCI device in virtual memory */
+static int
+pci_uio_map_resource(struct rte_pci_device *dev)
+{
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+ char dirname2[PATH_MAX];
+ char filename[PATH_MAX];
+ char devname[PATH_MAX]; /* contains the /dev/uioX */
+ void *mapaddr;
+ unsigned uio_num;
+ unsigned long size, offset;
+ struct rte_pci_addr *loc = &dev->addr;
+ struct uio_resource *uio_res;
+
+ RTE_LOG(DEBUG, EAL, "map PCI resource for device "PCI_PRI_FMT"\n",
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ /* secondary processes - use already recorded details */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+
+ TAILQ_FOREACH(uio_res, uio_res_list, next) {
+ /* skip this element if it doesn't match our PCI address */
+ if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+ continue;
+
+ if (pci_map_resource(dev, uio_res->addr, uio_res->path, \
+ uio_res->offset, uio_res->size) == uio_res->addr)
+ return 0;
+ else {
+ RTE_LOG(ERR, EAL, "Cannot mmap device resource\n");
+ return -1;
+ }
+ }
+ RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
+ return -1;
+ }
+
+ /* depending on kernel version, uio can be located in uio/uioX
+ * or uio:uioX */
+
+ rte_snprintf(dirname, sizeof(dirname),
+ "/sys/bus/pci/devices/" PCI_PRI_FMT "/uio",
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ dir = opendir(dirname);
+ if (dir == NULL) {
+ /* retry with the parent directory */
+ rte_snprintf(dirname, sizeof(dirname),
+ "/sys/bus/pci/devices/" PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+ dir = opendir(dirname);
+
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+ return -1;
+ }
+ }
+
+ /* take the first file starting with "uio" */
+ while ((e = readdir(dir)) != NULL) {
+ int shortprefix_len = sizeof("uio") - 1; /* format could be uio%d ...*/
+ int longprefix_len = sizeof("uio:uio") - 1; /* ... or uio:uio%d */
+ char *endptr;
+
+ if (strncmp(e->d_name, "uio", 3) != 0)
+ continue;
+
+ /* first try uio%d */
+ errno = 0;
+ uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+ if (errno == 0 && endptr != e->d_name) {
+ rte_snprintf(dirname2, sizeof(dirname2),
+ "%s/uio%u", dirname, uio_num);
+ break;
+ }
+
+ /* then try uio:uio%d */
+ errno = 0;
+ uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+ if (errno == 0 && endptr != e->d_name) {
+ rte_snprintf(dirname2, sizeof(dirname2),
+ "%s/uio:uio%u", dirname, uio_num);
+ break;
+ }
+ }
+ closedir(dir);
+
+ /* No uio resource found */
+ if (e == NULL)
+ return 0;
+
+ /* get mapping offset */
+ rte_snprintf(filename, sizeof(filename),
+ "%s/maps/map0/offset", dirname2);
+ if (pci_parse_sysfs_value(filename, &offset) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse offset\n",
+ __func__);
+ return -1;
+ }
+
+ /* get mapping size */
+ rte_snprintf(filename, sizeof(filename),
+ "%s/maps/map0/size", dirname2);
+ if (pci_parse_sysfs_value(filename, &size) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse size\n",
+ __func__);
+ return -1;
+ }
+
+ /* open and mmap /dev/uioX */
+ rte_snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+ mapaddr = pci_map_resource(dev, NULL, devname, offset, size);
+ if (mapaddr == NULL)
+ return -1;
+ dev->mem_resource.addr = mapaddr;
+
+ /* save the mapping details for secondary processes*/
+ uio_res = rte_malloc("UIO_RES", sizeof(*uio_res), 0);
+ if (uio_res == NULL){
+ RTE_LOG(ERR, EAL, "%s(): cannot store uio mmap details\n", __func__);
+ return -1;
+ }
+ uio_res->addr = mapaddr;
+ uio_res->offset = offset;
+ uio_res->size = size;
+ rte_snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+ memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));
+
+ TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
+
+ return 0;
+}
+
+/* parse the "resource" sysfs file */
+#define IORESOURCE_MEM 0x00000200
+
+static int
+pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ union pci_resource_info {
+ struct {
+ char *phys_addr;
+ char *end_addr;
+ char *flags;
+ };
+ char *ptrs[PCI_RESOURCE_FMT_NVAL];
+ } res_info;
+ int i;
+ uint64_t phys_addr, end_addr, flags;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n");
+ return -1;
+ }
+
+ for (i = 0; i<PCI_MAX_RESOURCE; i++) {
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read resource\n", __func__);
+ goto error;
+ }
+
+ if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) {
+ RTE_LOG(ERR, EAL, "%s(): bad resource format\n", __func__);
+ goto error;
+ }
+ errno = 0;
+ phys_addr = strtoull(res_info.phys_addr, NULL, 16);
+ end_addr = strtoull(res_info.end_addr, NULL, 16);
+ flags = strtoull(res_info.flags, NULL, 16);
+ if (errno != 0) {
+ RTE_LOG(ERR, EAL, "%s(): bad resource format\n", __func__);
+ goto error;
+ }
+
+ if (flags & IORESOURCE_MEM) {
+ dev->mem_resource.phys_addr = phys_addr;
+ dev->mem_resource.len = end_addr - phys_addr + 1;
+ dev->mem_resource.addr = NULL; /* not mapped for now */
+ break;
+ }
+ }
+ fclose(f);
+ return 0;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+/* parse a sysfs file containing one integer value */
+static int
+pci_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ *val = strtoul(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+/* Scan one pci sysfs entry, and fill the devices list from it. */
+static int
+pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
+ uint8_t devid, uint8_t function)
+{
+ char filename[PATH_MAX];
+ unsigned long tmp;
+ struct rte_pci_device *dev;
+
+ dev = malloc(sizeof(*dev));
+ if (dev == NULL) {
+ return -1;
+ }
+
+ memset(dev, 0, sizeof(*dev));
+ dev->addr.domain = domain;
+ dev->addr.bus = bus;
+ dev->addr.devid = devid;
+ dev->addr.function = function;
+
+ /* get vendor id */
+ rte_snprintf(filename, sizeof(filename), "%s/vendor", dirname);
+ if (pci_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.vendor_id = (uint16_t)tmp;
+
+ /* get device id */
+ rte_snprintf(filename, sizeof(filename), "%s/device", dirname);
+ if (pci_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.device_id = (uint16_t)tmp;
+
+ /* get subsystem_vendor id */
+ rte_snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
+ dirname);
+ if (pci_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.subsystem_vendor_id = (uint16_t)tmp;
+
+ /* get subsystem_device id */
+ rte_snprintf(filename, sizeof(filename), "%s/subsystem_device",
+ dirname);
+ if (pci_parse_sysfs_value(filename, &tmp) < 0) {
+ free(dev);
+ return -1;
+ }
+ dev->id.subsystem_device_id = (uint16_t)tmp;
+
+ /* parse resources */
+ rte_snprintf(filename, sizeof(filename), "%s/resource", dirname);
+ if (pci_parse_sysfs_resource(filename, dev) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
+ free(dev);
+ return -1;
+ }
+
+ /* device is valid, add in list */
+ TAILQ_INSERT_TAIL(&device_list, dev, next);
+
+ return 0;
+}
+
+/*
+ * split up a pci address into its constituent parts.
+ */
+static int
+parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain,
+ uint8_t *bus, uint8_t *devid, uint8_t *function)
+{
+ /* first split on ':' */
+ union splitaddr {
+ struct {
+ char *domain;
+ char *bus;
+ char *devid;
+ char *function;
+ };
+ char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */
+ } splitaddr;
+
+ char *buf_copy = strndup(buf, bufsize);
+ if (buf_copy == NULL)
+ return -1;
+
+ if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':')
+ != PCI_FMT_NVAL - 1)
+ goto error;
+ /* final split is on '.' between devid and function */
+ splitaddr.function = strchr(splitaddr.devid,'.');
+ if (splitaddr.function == NULL)
+ goto error;
+ *splitaddr.function++ = '\0';
+
+ /* now convert to int values */
+ errno = 0;
+ *domain = (uint8_t)strtoul(splitaddr.domain, NULL, 16);
+ *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
+ *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
+ *function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
+ if (errno != 0)
+ goto error;
+
+ free(buf_copy); /* free the copy made with strdup */
+ return 0;
+error:
+ free(buf_copy);
+ return -1;
+}
+
+/*
+ * Scan the content of the PCI bus, and the devices in the devices
+ * list
+ */
+static int
+pci_scan(void)
+{
+ struct dirent *e;
+ DIR *dir;
+ char dirname[PATH_MAX];
+ uint16_t domain;
+ uint8_t bus, devid, function;
+
+ dir = opendir(SYSFS_PCI_DEVICES);
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain,
+ &bus, &devid, &function) != 0)
+ continue;
+
+ rte_snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES,
+ e->d_name);
+ if (pci_scan_one(dirname, domain, bus, devid, function) < 0)
+ goto error;
+ }
+ closedir(dir);
+ return 0;
+
+error:
+ closedir(dir);
+ return -1;
+}
+
+/* unbind kernel driver for this device */
+static int
+pci_unbind_kernel_driver(struct rte_pci_device *dev)
+{
+ int n;
+ FILE *f;
+ char filename[PATH_MAX];
+ char buf[BUFSIZ];
+ struct rte_pci_addr *loc = &dev->addr;
+
+ /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */
+ rte_snprintf(filename, sizeof(filename),
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind",
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ RTE_LOG(DEBUG, EAL, "unbind kernel driver %s\n", filename);
+
+ f = fopen(filename, "w");
+ if (f == NULL) /* device was not bound */
+ return 0;
+
+ n = rte_snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
+ loc->domain, loc->bus, loc->devid, loc->function);
+ if ((n < 0) || (n >= (int)sizeof(buf))) {
+ RTE_LOG(ERR, EAL, "%s(): rte_snprintf failed\n", __func__);
+ goto error;
+ }
+ if (fwrite(buf, n, 1, f) == 0)
+ goto error;
+
+ fclose(f);
+ return 0;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+/*
+ * If vendor/device ID match, call the devinit() function of the
+ * driver.
+ */
+int
+rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev)
+{
+ struct rte_pci_id *id_table;
+ const char *module_name = NULL;
+ int ret;
+
+ if (dr->drv_flags & RTE_PCI_DRV_NEED_IGB_UIO)
+ module_name = IGB_UIO_NAME;
+
+ ret = pci_uio_check_module(module_name);
+ if (ret != 0)
+ rte_exit(1, "The %s module is required by the %s driver\n",
+ module_name, dr->name);
+
+ for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) {
+
+ /* check if device's identifiers match the driver's ones */
+ if (id_table->vendor_id != dev->id.vendor_id &&
+ id_table->vendor_id != PCI_ANY_ID)
+ continue;
+ if (id_table->device_id != dev->id.device_id &&
+ id_table->device_id != PCI_ANY_ID)
+ continue;
+ if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id &&
+ id_table->subsystem_vendor_id != PCI_ANY_ID)
+ continue;
+ if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
+ id_table->subsystem_device_id != PCI_ANY_ID)
+ continue;
+
+ RTE_LOG(DEBUG, EAL, "probe driver: %x:%x %s\n",
+ dev->id.vendor_id, dev->id.device_id, dr->name);
+
+ /* Unbind PCI devices if needed */
+ if (module_name != NULL) {
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* unbind current driver, bind ours */
+ if (pci_unbind_kernel_driver(dev) < 0)
+ return -1;
+ if (pci_uio_bind_device(dev, module_name) < 0)
+ return -1;
+ }
+ /* map the NIC resources */
+ if (pci_uio_map_resource(dev) < 0)
+ return -1;
+ }
+ /* call the driver devinit() function */
+ return dr->devinit(dr, dev);
+
+ }
+ return -1;
+}
+
+/* Init the PCI EAL subsystem */
+int
+rte_eal_pci_init(void)
+{
+ TAILQ_INIT(&driver_list);
+ TAILQ_INIT(&device_list);
+ uio_res_list = RTE_TAILQ_RESERVE("PCI_RESOURCE_LIST", uio_res_list);
+
+ /* for debug purposes, PCI can be disabled */
+ if (internal_config.no_pci)
+ return 0;
+
+ if (pci_scan() < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
+ return -1;
+ }
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
new file mode 100644
index 0000000..7409d28
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -0,0 +1,237 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/queue.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_per_lcore.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+ int n;
+ char c = 0;
+ int m2s = lcore_config[slave_id].pipe_master2slave[1];
+ int s2m = lcore_config[slave_id].pipe_slave2master[0];
+
+ if (lcore_config[slave_id].state != WAIT)
+ return -EBUSY;
+
+ lcore_config[slave_id].f = f;
+ lcore_config[slave_id].arg = arg;
+
+ /* send message */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(m2s, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ /* wait ack */
+ n = 0;
+ do {
+ n = read(s2m, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ return 0;
+}
+
+/* set affinity for current thread */
+static int
+eal_thread_set_affinity(void)
+{
+ int s;
+ pthread_t thread;
+
+/*
+ * According to the section VERSIONS of the CPU_ALLOC man page:
+ *
+ * The CPU_ZERO(), CPU_SET(), CPU_CLR(), and CPU_ISSET() macros were added
+ * in glibc 2.3.3.
+ *
+ * CPU_COUNT() first appeared in glibc 2.6.
+ *
+ * CPU_AND(), CPU_OR(), CPU_XOR(), CPU_EQUAL(), CPU_ALLOC(),
+ * CPU_ALLOC_SIZE(), CPU_FREE(), CPU_ZERO_S(), CPU_SET_S(), CPU_CLR_S(),
+ * CPU_ISSET_S(), CPU_AND_S(), CPU_OR_S(), CPU_XOR_S(), and CPU_EQUAL_S()
+ * first appeared in glibc 2.7.
+ */
+#if defined(CPU_ALLOC)
+ size_t size;
+ cpu_set_t *cpusetp;
+
+ cpusetp = CPU_ALLOC(RTE_MAX_LCORE);
+ if (cpusetp == NULL) {
+ RTE_LOG(ERR, EAL, "CPU_ALLOC failed\n");
+ return -1;
+ }
+
+ size = CPU_ALLOC_SIZE(RTE_MAX_LCORE);
+ CPU_ZERO_S(size, cpusetp);
+ CPU_SET_S(rte_lcore_id(), size, cpusetp);
+
+ thread = pthread_self();
+ s = pthread_setaffinity_np(thread, size, cpusetp);
+ if (s != 0) {
+ RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+ CPU_FREE(cpusetp);
+ return -1;
+ }
+
+ CPU_FREE(cpusetp);
+#else /* CPU_ALLOC */
+ cpu_set_t cpuset;
+ CPU_ZERO( &cpuset );
+ CPU_SET( rte_lcore_id(), &cpuset );
+
+ thread = pthread_self();
+ s = pthread_setaffinity_np(thread, sizeof( cpuset ), &cpuset);
+ if (s != 0) {
+ RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+ return -1;
+ }
+#endif
+ return 0;
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__attribute__((noreturn)) void *
+eal_thread_loop(__attribute__((unused)) void *arg)
+{
+ char c;
+ int n, ret;
+ unsigned lcore_id;
+ pthread_t thread_id;
+ int m2s, s2m;
+
+ thread_id = pthread_self();
+
+ /* retrieve our lcore_id from the configuration structure */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (thread_id == lcore_config[lcore_id].thread_id)
+ break;
+ }
+ if (lcore_id == RTE_MAX_LCORE)
+ rte_panic("cannot retrieve lcore id\n");
+
+ RTE_LOG(DEBUG, EAL, "Core %u is ready (tid=%x)\n",
+ lcore_id, (int)thread_id);
+
+ m2s = lcore_config[lcore_id].pipe_master2slave[0];
+ s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+
+ /* read on our pipe to get commands */
+ while (1) {
+ void *fct_arg;
+
+ /* wait command */
+ n = 0;
+ do {
+ n = read(m2s, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ lcore_config[lcore_id].state = RUNNING;
+
+ /* send ack */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(s2m, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ if (lcore_config[lcore_id].f == NULL)
+ rte_panic("NULL function pointer\n");
+
+ /* call the function and store the return value */
+ fct_arg = lcore_config[lcore_id].arg;
+ ret = lcore_config[lcore_id].f(fct_arg);
+ lcore_config[lcore_id].ret = ret;
+ rte_wmb();
+ lcore_config[lcore_id].state = FINISHED;
+ }
+
+ /* never reached */
+ /* pthread_exit(NULL); */
+ /* return NULL; */
+}
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_fs_paths.h b/lib/librte_eal/linuxapp/eal/include/eal_fs_paths.h
new file mode 100644
index 0000000..9c5ffb2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/eal_fs_paths.h
@@ -0,0 +1,96 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+/**
+ * @file
+ * Paths used for storing hugepage and config info for multi-process support.
+ */
+
+#ifndef _EAL_LINUXAPP_FS_PATHS_H
+#define _EAL_LINUXAPP_FS_PATHS_H
+
+/** Path of rte config file. */
+#define RUNTIME_CONFIG_FMT "%s/.%s_config"
+
+static const char *default_config_dir = "/var/run";
+
+static inline const char *
+eal_runtime_config_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+ const char *directory = default_config_dir;
+ const char *home_dir = getenv("HOME");
+
+ if (getuid() != 0 && home_dir != NULL)
+ directory = home_dir;
+ rte_snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
+ internal_config.hugefile_prefix);
+ return buffer;
+}
+
+/** Path of hugepage info file. */
+#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info"
+
+static inline const char *
+eal_hugepage_info_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+ const char *directory = default_config_dir;
+ const char *home_dir = getenv("HOME");
+
+ if (getuid() != 0 && home_dir != NULL)
+ directory = home_dir;
+ rte_snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory,
+ internal_config.hugefile_prefix);
+ return buffer;
+}
+
+/** String format for hugepage map files. */
+#define HUGEFILE_FMT "%s/%smap_%d"
+
+static inline const char *
+eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+ rte_snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
+ internal_config.hugefile_prefix, f_id);
+ buffer[buflen - 1] = '\0';
+ return buffer;
+}
+
+/** define the default filename prefix for the %s values above */
+#define HUGEFILE_PREFIX_DEFAULT "rte"
+
+
+#endif /* _EAL_LINUXAPP_FS_PATHS_H */
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h b/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h
new file mode 100644
index 0000000..2c7d646
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h
@@ -0,0 +1,62 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#ifndef RTE_LINUXAPP_HUGEPAGES_H_
+#define RTE_LINUXAPP_HUGEPAGES_H_
+
+#define MAX_HUGEPAGE_PATH PATH_MAX
+
+/**
+ * Structure used to store informations about hugepages that we mapped
+ * through the files in hugetlbfs.
+ */
+struct hugepage {
+ void *orig_va; /**< virtual addr of first mmap() */
+ void *final_va; /**< virtual addr of 2nd mmap() */
+ uint64_t physaddr; /**< physical addr */
+ uint64_t size; /**< the page size */
+ int socket_id; /**< NUMA socket ID */
+ int file_id; /**< the '%d' in HUGEFILE_FMT */
+ int memseg_id; /**< the memory segment to which page belongs */
+ char filepath[MAX_HUGEPAGE_PATH]; /**< Path to backing file on filesystem */
+};
+
+/**
+ * Read the information from linux on what hugepages are available
+ * for the EAL to use
+ */
+int eal_hugepage_info_init(void);
+
+#endif /* EAL_HUGEPAGES_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h b/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h
new file mode 100644
index 0000000..70d5afb
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h
@@ -0,0 +1,76 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+/**
+ * @file
+ * Holds the structures for the eal internal configuration
+ */
+
+#ifndef _EAL_LINUXAPP_INTERNAL_CFG
+#define _EAL_LINUXAPP_INTERNAL_CFG
+
+#define MAX_HUGEPAGE_SIZES 3 /**< support up to 3 page sizes */
+
+/*
+ * internal configuration structure for the number, size and
+ * mount points of hugepages
+ */
+struct hugepage_info {
+ uint64_t hugepage_sz; /**< size of a huge page */
+ const char *hugedir; /**< dir where hugetlbfs is mounted */
+ uint32_t num_pages; /**< number of hugepages of that size */
+};
+
+/**
+ * internal configuration
+ */
+struct internal_config {
+ volatile uint64_t memory; /* amount of asked memory */
+ volatile unsigned force_nchannel; /* force number of channels */
+ volatile unsigned force_nrank; /* force number of ranks */
+ volatile unsigned no_hugetlbfs; /* true to disable hugetlbfs */
+ volatile unsigned no_pci; /* true to disable PCI */
+ volatile unsigned no_hpet; /* true to disable HPET */
+ volatile unsigned no_shconf; /* true if there is no shared config */
+ volatile enum rte_proc_type_t process_type; /* multi-process proc type */
+ const char *hugefile_prefix; /* the base filename of hugetlbfs files */
+ const char *hugepage_dir; /* specific hugetlbfs directory to use */
+
+ unsigned num_hugepage_sizes; /* how many sizes on this system */
+ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+};
+extern struct internal_config internal_config; /**< Global EAL configuration. */
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_thread.h b/lib/librte_eal/linuxapp/eal/include/eal_thread.h
new file mode 100644
index 0000000..a04a35e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/eal_thread.h
@@ -0,0 +1,55 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#ifndef _EAL_LINUXAPP_THREAD_H_
+#define _EAL_LINUXAPP_THREAD_H_
+
+/**
+ * basic loop of thread, called for each thread by eal_init().
+ *
+ * @param arg
+ * opaque pointer
+ */
+__attribute__((noreturn)) void *eal_thread_loop(void *arg);
+
+/**
+ * Init per-lcore info for master thread
+ *
+ * @param lcore_id
+ * identifier of master lcore
+ */
+void eal_thread_init_master(unsigned lcore_id);
+
+#endif /* _EAL_LINUXAPP_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
new file mode 100644
index 0000000..15ca209
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -0,0 +1,56 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#error "don't include this file directly, please include generic <rte_interrupts.h>"
+#endif
+
+#ifndef _RTE_LINUXAPP_INTERRUPTS_H_
+#define _RTE_LINUXAPP_INTERRUPTS_H_
+
+enum rte_intr_handle_type {
+ RTE_INTR_HANDLE_UNKNOWN = 0,
+ RTE_INTR_HANDLE_UIO, /**< uio device handle */
+ RTE_INTR_HANDLE_ALARM, /**< alarm handle */
+ RTE_INTR_HANDLE_MAX
+};
+
+/** Handle for interrupts. */
+struct rte_intr_handle {
+ int fd; /**< file descriptor */
+ enum rte_intr_handle_type type; /**< handle type */
+};
+
+#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_lcore.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_lcore.h
new file mode 100644
index 0000000..4f14cbb
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_lcore.h
@@ -0,0 +1,92 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#ifndef _RTE_LCORE_H_
+#error "don't include this file directly, please include generic <rte_lcore.h>"
+#endif
+
+#ifndef _RTE_LINUXAPP_LCORE_H_
+#define _RTE_LINUXAPP_LCORE_H_
+
+/**
+ * @file
+ * API for lcore and socket manipulation in linuxapp environment
+ */
+
+/**
+ * structure storing internal configuration (per-lcore)
+ */
+struct lcore_config {
+ unsigned detected; /**< true if lcore was detected */
+ pthread_t thread_id; /**< pthread identifier */
+ int pipe_master2slave[2]; /**< communication pipe with master */
+ int pipe_slave2master[2]; /**< communication pipe with master */
+ lcore_function_t * volatile f; /**< function to call */
+ void * volatile arg; /**< argument of function */
+ volatile int ret; /**< return value of function */
+ volatile enum rte_lcore_state_t state; /**< lcore state */
+ unsigned socket_id; /**< physical socket id for this lcore */
+};
+
+/**
+ * internal configuration (per-lcore)
+ */
+extern struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/**
+ * Return the ID of the physical socket of the logical core we are
+ * running on.
+ */
+static inline unsigned
+rte_socket_id(void)
+{
+ return lcore_config[rte_lcore_id()].socket_id;
+}
+
+/**
+ * Get the ID of the physical socket of the specified lcore
+ *
+ * @param lcore_id
+ * the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1.
+ * @return
+ * the ID of lcoreid's physical socket
+ */
+static inline unsigned
+rte_lcore_to_socket_id(unsigned lcore_id)
+{
+ return lcore_config[lcore_id].socket_id;
+}
+
+#endif /* _RTE_LCORE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_per_lcore.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_per_lcore.h
new file mode 100644
index 0000000..781cfed
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_per_lcore.h
@@ -0,0 +1,69 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * version: DPDK.L.1.2.3-3
+ */
+
+#ifndef _RTE_PER_LCORE_H_
+#error "don't include this file directly, please include generic <rte_per_lcore.h>"
+#endif
+
+#ifndef _RTE_LINUXAPP_PER_LCORE_H_
+#define _RTE_LINUXAPP_PER_LCORE_H_
+
+/**
+ * @file
+ * Per-lcore variables in RTE on linuxapp environment
+ */
+
+#include <pthread.h>
+
+/**
+ * Macro to define a per lcore variable "var" of type "type", don't
+ * use keywords like "static" or "volatile" in type, just prefix the
+ * whole macro.
+ */
+#define RTE_DEFINE_PER_LCORE(type, name) \
+ __thread __typeof__(type) per_lcore_##name
+
+/**
+ * Macro to declare an extern per lcore variable "var" of type "type"
+ */
+#define RTE_DECLARE_PER_LCORE(type, name) \
+ extern __thread __typeof__(type) per_lcore_##name
+
+/**
+ * Read/write the per-lcore variable value
+ */
+#define RTE_PER_LCORE(name) (per_lcore_##name)
+
+#endif /* _RTE_LINUXAPP_PER_LCORE_H_ */
diff --git a/lib/librte_eal/linuxapp/igb_uio/Makefile b/lib/librte_eal/linuxapp/igb_uio/Makefile
new file mode 100644
index 0000000..f52aa7f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/Makefile
@@ -0,0 +1,55 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# version: DPDK.L.1.2.3-3
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = igb_uio
+MODULE_PATH = drivers/net/igb_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := igb_uio.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
new file mode 100644
index 0000000..51733f6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -0,0 +1,402 @@
+/*-
+ *
+ * Copyright (c) 2010-2012, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * GNU GPL V2: http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/version.h>
+
+/* Some function names changes between 3.2.0 and 3.3.0... */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
+#define PCI_LOCK pci_block_user_cfg_access
+#define PCI_UNLOCK pci_unblock_user_cfg_access
+#else
+#define PCI_LOCK pci_cfg_access_lock
+#define PCI_UNLOCK pci_cfg_access_unlock
+#endif
+
+/**
+ * MSI-X related macros, copy from linux/pci_regs.h in kernel 2.6.39,
+ * but none of them in kernel 2.6.35.
+ */
+#ifndef PCI_MSIX_ENTRY_SIZE
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_LOWER_ADDR 0
+#define PCI_MSIX_ENTRY_UPPER_ADDR 4
+#define PCI_MSIX_ENTRY_DATA 8
+#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
+#endif
+
+#define IGBUIO_NUM_MSI_VECTORS 1
+
+/* interrupt mode */
+enum igbuio_intr_mode {
+ IGBUIO_LEGACY_INTR_MODE = 0,
+ IGBUIO_MSI_INTR_MODE,
+ IGBUIO_MSIX_INTR_MODE,
+ IGBUIO_INTR_MODE_MAX
+};
+
+/**
+ * A structure describing the private information for a uio device.
+ */
+struct rte_uio_pci_dev {
+ struct uio_info info;
+ struct pci_dev *pdev;
+ spinlock_t lock; /* spinlock for accessing PCI config space or msix data in multi tasks/isr */
+ enum igbuio_intr_mode mode;
+ struct msix_entry \
+ msix_entries[IGBUIO_NUM_MSI_VECTORS]; /* pointer to the msix vectors to be allocated later */
+};
+
+static const enum igbuio_intr_mode igbuio_intr_mode_preferred = IGBUIO_MSIX_INTR_MODE;
+
+/* PCI device id table */
+static struct pci_device_id igbuio_pci_ids[] = {
+#define RTE_PCI_DEV_ID_DECL(vend, dev) {PCI_DEVICE(vend, dev)},
+#include <rte_pci_dev_ids.h>
+{ 0, },
+};
+
+static inline struct rte_uio_pci_dev *
+igbuio_get_uio_pci_dev(struct uio_info *info)
+{
+ return container_of(info, struct rte_uio_pci_dev, info);
+}
+
+/**
+ * It masks the msix on/off of generating MSI-X messages.
+ */
+static int
+igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
+{
+ uint32_t mask_bits = desc->masked;
+ unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+ if (state != 0)
+ mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ else
+ mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+
+ if (mask_bits != desc->masked) {
+ writel(mask_bits, desc->mask_base + offset);
+ readl(desc->mask_base);
+ desc->masked = mask_bits;
+ }
+
+ return 0;
+}
+
+/**
+ * This function sets/clears the masks for generating LSC interrupts.
+ *
+ * @param info
+ * The pointer to struct uio_info.
+ * @param on
+ * The on/off flag of masking LSC.
+ * @return
+ * -On success, zero value.
+ * -On failure, a negative value.
+ */
+static int
+igbuio_set_interrupt_mask(struct rte_uio_pci_dev *udev, int32_t state)
+{
+ struct pci_dev *pdev = udev->pdev;
+
+ if (udev->mode == IGBUIO_MSIX_INTR_MODE) {
+ struct msi_desc *desc;
+
+ list_for_each_entry(desc, &pdev->msi_list, list) {
+ igbuio_msix_mask_irq(desc, state);
+ }
+ }
+ else if (udev->mode == IGBUIO_LEGACY_INTR_MODE) {
+ uint32_t status;
+ uint16_t old, new;
+
+ pci_read_config_dword(pdev, PCI_COMMAND, &status);
+ old = status;
+ if (state != 0)
+ new = old & (~PCI_COMMAND_INTX_DISABLE);
+ else
+ new = old | PCI_COMMAND_INTX_DISABLE;
+
+ if (old != new)
+ pci_write_config_word(pdev, PCI_COMMAND, new);
+ }
+
+ return 0;
+}
+
+/**
+ * This is the irqcontrol callback to be registered to uio_info.
+ * It can be used to disable/enable interrupt from user space processes.
+ *
+ * @param info
+ * pointer to uio_info.
+ * @param irq_state
+ * state value. 1 to enable interrupt, 0 to disable interrupt.
+ *
+ * @return
+ * - On success, 0.
+ * - On failure, a negative value.
+ */
+static int
+igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
+{
+ unsigned long flags;
+ struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
+ struct pci_dev *pdev = udev->pdev;
+
+ spin_lock_irqsave(&udev->lock, flags);
+ PCI_LOCK(pdev);
+
+ igbuio_set_interrupt_mask(udev, irq_state);
+
+ PCI_UNLOCK(pdev);
+ spin_unlock_irqrestore(&udev->lock, flags);
+
+ return 0;
+}
+
+/**
+ * This is interrupt handler which will check if the interrupt is for the right device.
+ * If yes, disable it here and will be enable later.
+ */
+static irqreturn_t
+igbuio_pci_irqhandler(int irq, struct uio_info *info)
+{
+ irqreturn_t ret = IRQ_NONE;
+ unsigned long flags;
+ struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
+ struct pci_dev *pdev = udev->pdev;
+ uint32_t cmd_status_dword;
+ uint16_t status;
+
+ spin_lock_irqsave(&udev->lock, flags);
+ /* block userspace PCI config reads/writes */
+ PCI_LOCK(pdev);
+
+ /* for legacy mode, interrupt maybe shared */
+ if (udev->mode == IGBUIO_LEGACY_INTR_MODE) {
+ pci_read_config_dword(pdev, PCI_COMMAND, &cmd_status_dword);
+ status = cmd_status_dword >> 16;
+ /* interrupt is not ours, goes to out */
+ if (!(status & PCI_STATUS_INTERRUPT))
+ goto done;
+ }
+
+ igbuio_set_interrupt_mask(udev, 0);
+ ret = IRQ_HANDLED;
+done:
+ /* unblock userspace PCI config reads/writes */
+ PCI_UNLOCK(pdev);
+ spin_unlock_irqrestore(&udev->lock, flags);
+ printk(KERN_INFO "irq 0x%x %s\n", irq, (ret == IRQ_HANDLED) ? "handled" : "not handled");
+
+ return ret;
+}
+
+/* Remap pci resources described by bar #pci_bar in uio resource n. */
+static int
+igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
+ int n, int pci_bar, const char *name)
+{
+ unsigned long addr, len;
+ void *internal_addr;
+
+ addr = pci_resource_start(dev, pci_bar);
+ len = pci_resource_len(dev, pci_bar);
+ if (addr == 0 || len == 0)
+ return -1;
+ internal_addr = ioremap(addr, len);
+ if (internal_addr == NULL)
+ return -1;
+ info->mem[n].name = name;
+ info->mem[n].addr = addr;
+ info->mem[n].internal_addr = internal_addr;
+ info->mem[n].size = len;
+ info->mem[n].memtype = UIO_MEM_PHYS;
+ return 0;
+}
+
+/* Unmap previously ioremap'd resources */
+static void
+igbuio_pci_release_iomem(struct uio_info *info)
+{
+ int i;
+ for (i = 0; i < MAX_UIO_MAPS; i++) {
+ if (info->mem[i].internal_addr)
+ iounmap(info->mem[i].internal_addr);
+ }
+}
+
+static int __devinit
+igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ struct rte_uio_pci_dev *udev;
+
+ udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
+ if (!udev)
+ return -ENOMEM;
+
+ /*
+ * enable device: ask low-level code to enable I/O and
+ * memory
+ */
+ if (pci_enable_device(dev)) {
+ printk(KERN_ERR "Cannot enable PCI device\n");
+ goto fail_free;
+ }
+
+ /* XXX should we use 64 bits ? */
+ /* set 32-bit DMA mask */
+ if (pci_set_dma_mask(dev,(uint64_t)0xffffffff)) {
+ printk(KERN_ERR "Cannot set DMA mask\n");
+ goto fail_disable;
+ }
+
+ /*
+ * reserve device's PCI memory regions for use by this
+ * module
+ */
+ if (pci_request_regions(dev, "igb_uio")) {
+ printk(KERN_ERR "Cannot request regions\n");
+ goto fail_disable;
+ }
+
+ /* enable bus mastering on the device */
+ pci_set_master(dev);
+
+ /* remap IO memory */
+ if (igbuio_pci_setup_iomem(dev, &udev->info, 0, 0, "config"))
+ goto fail_release_regions;
+
+ /* fill uio infos */
+ udev->info.name = "Intel IGB UIO";
+ udev->info.version = "0.1";
+ udev->info.handler = igbuio_pci_irqhandler;
+ udev->info.irqcontrol = igbuio_pci_irqcontrol;
+ udev->info.priv = udev;
+ udev->pdev = dev;
+ udev->mode = 0; /* set the default value for interrupt mode */
+ spin_lock_init(&udev->lock);
+
+ /* check if it need to try msix first */
+ if (igbuio_intr_mode_preferred == IGBUIO_MSIX_INTR_MODE) {
+ int vector;
+
+ for (vector = 0; vector < IGBUIO_NUM_MSI_VECTORS; vector ++)
+ udev->msix_entries[vector].entry = vector;
+
+ if (pci_enable_msix(udev->pdev, udev->msix_entries, IGBUIO_NUM_MSI_VECTORS) == 0) {
+ udev->mode = IGBUIO_MSIX_INTR_MODE;
+ }
+ else {
+ pci_disable_msix(udev->pdev);
+ printk(KERN_INFO "fail to enable pci msix, or not enough msix entries\n");
+ }
+ }
+ switch (udev->mode) {
+ case IGBUIO_MSIX_INTR_MODE:
+ udev->info.irq_flags = 0;
+ udev->info.irq = udev->msix_entries[0].vector;
+ break;
+ case IGBUIO_MSI_INTR_MODE:
+ break;
+ case IGBUIO_LEGACY_INTR_MODE:
+ udev->info.irq_flags = IRQF_SHARED;
+ udev->info.irq = dev->irq;
+ break;
+ default:
+ break;
+ }
+
+ pci_set_drvdata(dev, udev);
+ igbuio_pci_irqcontrol(&udev->info, 0);
+
+ /* register uio driver */
+ if (uio_register_device(&dev->dev, &udev->info))
+ goto fail_release_iomem;
+
+ printk(KERN_INFO "uio device registered with irq %lx\n", udev->info.irq);
+
+ return 0;
+
+fail_release_iomem:
+ igbuio_pci_release_iomem(&udev->info);
+ if (udev->mode == IGBUIO_MSIX_INTR_MODE)
+ pci_disable_msix(udev->pdev);
+fail_release_regions:
+ pci_release_regions(dev);
+fail_disable:
+ pci_disable_device(dev);
+fail_free:
+ kfree(udev);
+
+ return -ENODEV;
+}
+
+static void
+igbuio_pci_remove(struct pci_dev *dev)
+{
+ struct uio_info *info = pci_get_drvdata(dev);
+
+ uio_unregister_device(info);
+ if (((struct rte_uio_pci_dev *)info->priv)->mode == IGBUIO_MSIX_INTR_MODE)
+ pci_disable_msix(dev);
+ pci_release_regions(dev);
+ pci_disable_device(dev);
+ pci_set_drvdata(dev, NULL);
+ kfree(info);
+}
+
+static struct pci_driver igbuio_pci_driver = {
+ .name = "igb_uio",
+ .id_table = igbuio_pci_ids,
+ .probe = igbuio_pci_probe,
+ .remove = igbuio_pci_remove,
+};
+
+static int __init
+igbuio_pci_init_module(void)
+{
+ return pci_register_driver(&igbuio_pci_driver);
+}
+
+static void __exit
+igbuio_pci_exit_module(void)
+{
+ pci_unregister_driver(&igbuio_pci_driver);
+}
+
+module_init(igbuio_pci_init_module);
+module_exit(igbuio_pci_exit_module);
+
+MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");