summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Richardson <bruce.richardson@intel.com>2014-02-11 10:28:51 +0000
committerDavid Marchand <david.marchand@6wind.com>2014-02-25 21:29:19 +0100
commit40b966a211ab71e96b8e155d9058f224e7b5bbf6 (patch)
treeb7583c1e78d18c3f3e94ae5294dd5a5607268973
parent013615a784c18582d32e2a702049783c68801f6c (diff)
downloaddpdk-40b966a211ab.zip
dpdk-40b966a211ab.tar.gz
dpdk-40b966a211ab.tar.xz
ivshmem: library changes for mmaping using ivshmem
These library changes provide a new Intel DPDK feature for communicating with virtual machines using QEMU's IVSHMEM mechanism. The feature works by providing a command line for QEMU to map several hugepages into a single IVSHMEM device. For the guest to know what is inside any given IVSHMEM device (and to distinguish between Intel(R) DPDK and non-Intel(R) DPDK IVSHMEM devices), a metadata file is also mapped into the IVSHMEM segment. No work needs to be done by the guest application to map IVSHMEM devices into memory; they are automatically recognized by the Intel(R) DPDK Environment Abstraction Layer (EAL). Changes in this patch: * Changes to EAL to allow mapping of all hugepages in a memseg into a single file * Changes to EAL to allow ivshmem devices to be transparently mapped in the process running on the guest. * New ivshmem library to create and manage metadata exported to guest VM's * New ivshmem compilation targets * Mempool and ring changes to allow export of structures to a VM and allow a VM to attach to those structures. * New autotests to unit tests this functionality. Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
-rw-r--r--app/test/Makefile2
-rw-r--r--app/test/autotest_data.py6
-rw-r--r--app/test/commands.c4
-rw-r--r--app/test/test.c1
-rw-r--r--app/test/test.h1
-rw-r--r--app/test/test_ivshmem.c441
-rw-r--r--config/defconfig_x86_64-ivshmem-linuxapp-gcc49
-rw-r--r--config/defconfig_x86_64-ivshmem-linuxapp-icc49
-rw-r--r--lib/Makefile1
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c12
-rw-r--r--lib/librte_eal/common/include/eal_private.h22
-rw-r--r--lib/librte_eal/common/include/rte_memory.h3
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h3
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile7
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c16
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_ivshmem.c953
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c492
-rw-r--r--lib/librte_eal/linuxapp/eal/include/eal_filesystem.h14
-rw-r--r--lib/librte_eal/linuxapp/eal/include/eal_hugepages.h7
-rw-r--r--lib/librte_ivshmem/Makefile48
-rw-r--r--lib/librte_ivshmem/rte_ivshmem.c884
-rw-r--r--lib/librte_ivshmem/rte_ivshmem.h163
-rw-r--r--mk/rte.app.mk6
23 files changed, 3085 insertions, 99 deletions
diff --git a/app/test/Makefile b/app/test/Makefile
index 39fa163..c065a4c 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -92,6 +92,7 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_kni.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_power.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_common.c
SRCS-$(CONFIG_RTE_APP_TEST) += test_timer_perf.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_ivshmem.c
ifeq ($(CONFIG_RTE_APP_TEST),y)
SRCS-$(CONFIG_RTE_LIBRTE_ACL) += test_acl.c
@@ -107,6 +108,7 @@ CFLAGS_test_kni.o += -wd1478
else
CFLAGS_test_kni.o += -Wno-deprecated-declarations
endif
+CFLAGS += -D_GNU_SOURCE
# this application needs libraries first
DEPDIRS-$(CONFIG_RTE_APP_TEST) += lib
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index bdb7e94..1161a92 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -175,6 +175,12 @@ parallel_test_group_list = [
"Report" : None,
},
{
+ "Name" : "IVSHMEM autotest",
+ "Command" : "ivshmem_autotest",
+ "Func" : default_autotest,
+ "Report" : None,
+ },
+ {
"Name" : "Memcpy autotest",
"Command" : "memcpy_autotest",
"Func" : default_autotest,
diff --git a/app/test/commands.c b/app/test/commands.c
index 118f70d..f09bc90 100644
--- a/app/test/commands.c
+++ b/app/test/commands.c
@@ -184,6 +184,8 @@ static void cmd_autotest_parsed(void *parsed_result,
ret |= test_power();
if (all || !strcmp(res->autotest, "common_autotest"))
ret |= test_common();
+ if (all || !strcmp(res->autotest, "ivshmem_autotest"))
+ ret = test_ivshmem();
#ifdef RTE_LIBRTE_PMD_RING
if (all || !strcmp(res->autotest, "ring_pmd_autotest"))
ret |= test_pmd_ring();
@@ -224,7 +226,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest =
"memcpy_perf_autotest#ring_perf_autotest#"
"red_autotest#meter_autotest#sched_autotest#"
"memcpy_perf_autotest#kni_autotest#"
- "pm_autotest#"
+ "pm_autotest#ivshmem_autotest#"
#ifdef RTE_LIBRTE_ACL
"acl_autotest#"
#endif
diff --git a/app/test/test.c b/app/test/test.c
index c87e0df..3a7999b 100644
--- a/app/test/test.c
+++ b/app/test/test.c
@@ -86,6 +86,7 @@ do_recursive_call(void)
{ "test_memory_flags", no_action },
{ "test_file_prefix", no_action },
{ "test_no_huge_flag", no_action },
+ { "test_ivshmem", test_ivshmem },
};
if (recursive_call == NULL)
diff --git a/app/test/test.h b/app/test/test.h
index 71d87d1..adc6212 100644
--- a/app/test/test.h
+++ b/app/test/test.h
@@ -95,6 +95,7 @@ int test_kni(void);
int test_power(void);
int test_common(void);
int test_pmd_ring(void);
+int test_ivshmem(void);
int test_pci_run;
diff --git a/app/test/test_ivshmem.c b/app/test/test_ivshmem.c
new file mode 100644
index 0000000..52f8277
--- /dev/null
+++ b/app/test/test_ivshmem.c
@@ -0,0 +1,441 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <stdio.h>
+
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#ifdef RTE_LIBRTE_IVSHMEM
+
+#include <rte_common.h>
+#include <rte_ivshmem.h>
+#include <rte_string_fns.h>
+#include "process.h"
+
+#define DUPLICATE_METADATA "duplicate"
+#define METADATA_NAME "metadata"
+#define NONEXISTENT_METADATA "nonexistent"
+#define FIRST_TEST 'a'
+
+#define launch_proc(ARGV) process_dup(ARGV, \
+ sizeof(ARGV)/(sizeof(ARGV[0])), "test_ivshmem")
+
+#define ASSERT(cond,msg) do { \
+ if (!(cond)) { \
+ printf("**** TEST %s() failed: %s\n", \
+ __func__, msg); \
+ return -1; \
+ } \
+} while(0)
+
+static char*
+get_current_prefix(char * prefix, int size)
+{
+ char path[PATH_MAX] = {0};
+ char buf[PATH_MAX] = {0};
+
+ /* get file for config (fd is always 3) */
+ rte_snprintf(path, sizeof(path), "/proc/self/fd/%d", 3);
+
+ /* return NULL on error */
+ if (readlink(path, buf, sizeof(buf)) == -1)
+ return NULL;
+
+ /* get the basename */
+ rte_snprintf(buf, sizeof(buf), "%s", basename(buf));
+
+ /* copy string all the way from second char up to start of _config */
+ rte_snprintf(prefix, size, "%.*s",
+ strnlen(buf, sizeof(buf)) - sizeof("_config"), &buf[1]);
+
+ return prefix;
+}
+
+static struct rte_ivshmem_metadata*
+mmap_metadata(const char *name)
+{
+ int fd;
+ char pathname[PATH_MAX];
+ struct rte_ivshmem_metadata *metadata;
+
+ rte_snprintf(pathname, sizeof(pathname),
+ "/var/run/.dpdk_ivshmem_metadata_%s", name);
+
+ fd = open(pathname, O_RDWR, 0660);
+ if (fd < 0)
+ return NULL;
+
+ metadata = (struct rte_ivshmem_metadata*) mmap(NULL,
+ sizeof(struct rte_ivshmem_metadata), PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ if (metadata == MAP_FAILED)
+ return NULL;
+
+ close(fd);
+
+ return metadata;
+}
+
+static int
+create_duplicate(void)
+{
+ /* create a metadata that another process will then try to overwrite */
+ ASSERT (rte_ivshmem_metadata_create(DUPLICATE_METADATA) == 0,
+ "Creating metadata failed");
+ return 0;
+}
+
+static int
+test_ivshmem_create_lots_of_memzones(void)
+{
+ int i;
+ char name[IVSHMEM_NAME_LEN];
+ const struct rte_memzone *mz;
+
+ ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
+ "Failed to create metadata");
+
+ for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES; i++) {
+ rte_snprintf(name, sizeof(name), "mz_%i", i);
+
+ mz = rte_memzone_reserve(name, CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
+ ASSERT(mz != NULL, "Failed to reserve memzone");
+
+ ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
+ "Failed to add memzone");
+ }
+ mz = rte_memzone_reserve("one too many", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
+ ASSERT(mz != NULL, "Failed to reserve memzone");
+
+ ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0,
+ "Metadata should have been full");
+
+ return 0;
+}
+
+static int
+test_ivshmem_create_duplicate_memzone(void)
+{
+ const struct rte_memzone *mz;
+
+ ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
+ "Failed to create metadata");
+
+ mz = rte_memzone_reserve("mz", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
+ ASSERT(mz != NULL, "Failed to reserve memzone");
+
+ ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
+ "Failed to add memzone");
+
+ ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0,
+ "Added the same memzone twice");
+
+ return 0;
+}
+
+static int
+test_ivshmem_api_test(void)
+{
+ const struct rte_memzone * mz;
+ struct rte_mempool * mp;
+ struct rte_ring * r;
+ char buf[BUFSIZ];
+
+ memset(buf, 0, sizeof(buf));
+
+ r = rte_ring_create("ring", 1, SOCKET_ID_ANY, 0);
+ mp = rte_mempool_create("mempool", 1, 1, 1, 1, NULL, NULL, NULL, NULL,
+ SOCKET_ID_ANY, 0);
+ mz = rte_memzone_reserve("memzone", 64, SOCKET_ID_ANY, 0);
+
+ ASSERT(r != NULL, "Failed to create ring");
+ ASSERT(mp != NULL, "Failed to create mempool");
+ ASSERT(mz != NULL, "Failed to reserve memzone");
+
+ /* try to create NULL metadata */
+ ASSERT(rte_ivshmem_metadata_create(NULL) < 0,
+ "Created metadata with NULL name");
+
+ /* create valid metadata to do tests on */
+ ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
+ "Failed to create metadata");
+
+ /* test adding memzone */
+ ASSERT(rte_ivshmem_metadata_add_memzone(NULL, NULL) < 0,
+ "Added NULL memzone to NULL metadata");
+ ASSERT(rte_ivshmem_metadata_add_memzone(NULL, METADATA_NAME) < 0,
+ "Added NULL memzone");
+ ASSERT(rte_ivshmem_metadata_add_memzone(mz, NULL) < 0,
+ "Added memzone to NULL metadata");
+ ASSERT(rte_ivshmem_metadata_add_memzone(mz, NONEXISTENT_METADATA) < 0,
+ "Added memzone to nonexistent metadata");
+
+ /* test adding ring */
+ ASSERT(rte_ivshmem_metadata_add_ring(NULL, NULL) < 0,
+ "Added NULL ring to NULL metadata");
+ ASSERT(rte_ivshmem_metadata_add_ring(NULL, METADATA_NAME) < 0,
+ "Added NULL ring");
+ ASSERT(rte_ivshmem_metadata_add_ring(r, NULL) < 0,
+ "Added ring to NULL metadata");
+ ASSERT(rte_ivshmem_metadata_add_ring(r, NONEXISTENT_METADATA) < 0,
+ "Added ring to nonexistent metadata");
+
+ /* test adding mempool */
+ ASSERT(rte_ivshmem_metadata_add_mempool(NULL, NULL) < 0,
+ "Added NULL mempool to NULL metadata");
+ ASSERT(rte_ivshmem_metadata_add_mempool(NULL, METADATA_NAME) < 0,
+ "Added NULL mempool");
+ ASSERT(rte_ivshmem_metadata_add_mempool(mp, NULL) < 0,
+ "Added mempool to NULL metadata");
+ ASSERT(rte_ivshmem_metadata_add_mempool(mp, NONEXISTENT_METADATA) < 0,
+ "Added mempool to nonexistent metadata");
+
+ /* test creating command line */
+ ASSERT(rte_ivshmem_metadata_cmdline_generate(NULL, sizeof(buf), METADATA_NAME) < 0,
+ "Written command line into NULL buffer");
+ ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+ ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, 0, METADATA_NAME) < 0,
+ "Written command line into small buffer");
+ ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+ ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf), NULL) < 0,
+ "Written command line for NULL metadata");
+ ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+ ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf),
+ NONEXISTENT_METADATA) < 0,
+ "Writen command line for nonexistent metadata");
+ ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+ /* add stuff to config */
+ ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
+ "Failed to add memzone to valid config");
+ ASSERT(rte_ivshmem_metadata_add_ring(r, METADATA_NAME) == 0,
+ "Failed to add ring to valid config");
+ ASSERT(rte_ivshmem_metadata_add_mempool(mp, METADATA_NAME) == 0,
+ "Failed to add mempool to valid config");
+
+ /* create config */
+ ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf),
+ METADATA_NAME) == 0, "Failed to write command-line");
+
+ /* check if something was written */
+ ASSERT(strnlen(buf, sizeof(buf)) != 0, "Buffer is empty");
+
+ /* make sure we don't segfault */
+ rte_ivshmem_metadata_dump(NULL);
+
+ /* dump our metadata */
+ rte_ivshmem_metadata_dump(METADATA_NAME);
+
+ return 0;
+}
+
+static int
+test_ivshmem_create_duplicate_metadata(void)
+{
+ ASSERT(rte_ivshmem_metadata_create(DUPLICATE_METADATA) < 0,
+ "Creating duplicate metadata should have failed");
+
+ return 0;
+}
+
+static int
+test_ivshmem_create_metadata_config(void)
+{
+ struct rte_ivshmem_metadata *metadata;
+
+ rte_ivshmem_metadata_create(METADATA_NAME);
+
+ metadata = mmap_metadata(METADATA_NAME);
+
+ ASSERT(metadata != MAP_FAILED, "Metadata mmaping failed");
+
+ ASSERT(metadata->magic_number == IVSHMEM_MAGIC,
+ "Magic number is not that magic");
+
+ ASSERT(strncmp(metadata->name, METADATA_NAME, sizeof(metadata->name)) == 0,
+ "Name has not been set up");
+
+ ASSERT(metadata->entry[0].offset == 0, "Offest is not initialized");
+ ASSERT(metadata->entry[0].mz.addr == 0, "mz.addr is not initialized");
+ ASSERT(metadata->entry[0].mz.len == 0, "mz.len is not initialized");
+
+ return 0;
+}
+
+static int
+test_ivshmem_create_multiple_metadata_configs(void)
+{
+ int i;
+ char name[IVSHMEM_NAME_LEN];
+ struct rte_ivshmem_metadata *metadata;
+
+ for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES / 2; i++) {
+ rte_snprintf(name, sizeof(name), "test_%d", i);
+ rte_ivshmem_metadata_create(name);
+ metadata = mmap_metadata(name);
+
+ ASSERT(metadata->magic_number == IVSHMEM_MAGIC,
+ "Magic number is not that magic");
+
+ ASSERT(strncmp(metadata->name, name, sizeof(metadata->name)) == 0,
+ "Name has not been set up");
+ }
+
+ return 0;
+}
+
+static int
+test_ivshmem_create_too_many_metadata_configs(void)
+{
+ int i;
+ char name[IVSHMEM_NAME_LEN];
+
+ for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES; i++) {
+ rte_snprintf(name, sizeof(name), "test_%d", i);
+ ASSERT(rte_ivshmem_metadata_create(name) == 0,
+ "Create config file failed");
+ }
+
+ ASSERT(rte_ivshmem_metadata_create(name) < 0,
+ "Create config file didn't fail");
+
+ return 0;
+}
+
+enum rte_ivshmem_tests {
+ _test_ivshmem_api_test = 0,
+ _test_ivshmem_create_metadata_config,
+ _test_ivshmem_create_multiple_metadata_configs,
+ _test_ivshmem_create_too_many_metadata_configs,
+ _test_ivshmem_create_duplicate_metadata,
+ _test_ivshmem_create_lots_of_memzones,
+ _test_ivshmem_create_duplicate_memzone,
+ _last_test,
+};
+
+#define RTE_IVSHMEM_TEST_ID "RTE_IVSHMEM_TEST_ID"
+
+static int
+launch_all_tests_on_secondary_processes(void)
+{
+ int ret = 0;
+ char id;
+ char testid;
+ char tmp[PATH_MAX] = {0};
+ char prefix[PATH_MAX] = {0};
+
+ get_current_prefix(tmp, sizeof(tmp));
+
+ rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp);
+
+ const char *argv[] = { prgname, "-c", "1", "-n", "3",
+ "--proc-type=secondary", prefix };
+
+ for (id = 0; id < _last_test; id++) {
+ testid = (char)(FIRST_TEST + id);
+ setenv(RTE_IVSHMEM_TEST_ID, &testid, 1);
+ if (launch_proc(argv) != 0)
+ return -1;
+ }
+ return ret;
+}
+
+int
+test_ivshmem(void)
+{
+ int testid;
+
+ /* We want to have a clean execution for every test without exposing
+ * private global data structures in rte_ivshmem so we launch each test
+ * on a different secondary process. */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+
+ /* first, create metadata */
+ ASSERT(create_duplicate() == 0, "Creating metadata failed");
+
+ return launch_all_tests_on_secondary_processes();
+ }
+
+ testid = *(getenv(RTE_IVSHMEM_TEST_ID)) - FIRST_TEST;
+
+ printf("Secondary process running test %d \n", testid);
+
+ switch (testid) {
+ case _test_ivshmem_api_test:
+ return test_ivshmem_api_test();
+
+ case _test_ivshmem_create_metadata_config:
+ return test_ivshmem_create_metadata_config();
+
+ case _test_ivshmem_create_multiple_metadata_configs:
+ return test_ivshmem_create_multiple_metadata_configs();
+
+ case _test_ivshmem_create_too_many_metadata_configs:
+ return test_ivshmem_create_too_many_metadata_configs();
+
+ case _test_ivshmem_create_duplicate_metadata:
+ return test_ivshmem_create_duplicate_metadata();
+
+ case _test_ivshmem_create_lots_of_memzones:
+ return test_ivshmem_create_lots_of_memzones();
+
+ case _test_ivshmem_create_duplicate_memzone:
+ return test_ivshmem_create_duplicate_memzone();
+
+ default:
+ break;
+ }
+
+ return -1;
+}
+#else /* RTE_LIBRTE_IVSHMEM */
+
+int
+test_ivshmem(void)
+{
+ printf("This binary was not compiled with IVSHMEM support!\n");
+ return 0;
+}
+#endif /* RTE_LIBRTE_IVSHMEM */
diff --git a/config/defconfig_x86_64-ivshmem-linuxapp-gcc b/config/defconfig_x86_64-ivshmem-linuxapp-gcc
new file mode 100644
index 0000000..2f55a69
--- /dev/null
+++ b/config/defconfig_x86_64-ivshmem-linuxapp-gcc
@@ -0,0 +1,49 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+# use default config
+#
+
+#include "defconfig_x86_64-default-linuxapp-gcc"
+
+#
+# Compile IVSHMEM library
+#
+CONFIG_RTE_LIBRTE_IVSHMEM=y
+CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32
+
+# Set EAL to single file segments
+CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y \ No newline at end of file
diff --git a/config/defconfig_x86_64-ivshmem-linuxapp-icc b/config/defconfig_x86_64-ivshmem-linuxapp-icc
new file mode 100644
index 0000000..14f0926
--- /dev/null
+++ b/config/defconfig_x86_64-ivshmem-linuxapp-icc
@@ -0,0 +1,49 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+# use default config
+#
+
+#include "defconfig_x86_64-default-linuxapp-icc"
+
+#
+# Compile IVSHMEM library
+#
+CONFIG_RTE_LIBRTE_IVSHMEM=y
+CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32
+
+# Set EAL to single file segments
+CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y
diff --git a/lib/Makefile b/lib/Makefile
index fda306e..c9f0111 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
+DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem
endif
include $(RTE_SDK)/mk/rte.sharelib.mk
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 4d60f8c..a1fcdfd 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -479,11 +479,17 @@ rte_eal_memzone_init(void)
rte_rwlock_write_lock(&mcfg->mlock);
- /* duplicate the memsegs from config */
- memcpy(free_memseg, memseg, sizeof(struct rte_memseg) * RTE_MAX_MEMSEG);
+ /* fill in uninitialized free_memsegs */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (memseg[i].addr == NULL)
+ break;
+ if (free_memseg[i].addr != NULL)
+ continue;
+ memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg));
+ }
/* make all zones cache-aligned */
- for (i=0; i<RTE_MAX_MEMSEG; i++) {
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
if (free_memseg[i].addr == NULL)
break;
if (memseg_sanitize(&free_memseg[i]) < 0) {
diff --git a/lib/librte_eal/common/include/eal_private.h b/lib/librte_eal/common/include/eal_private.h
index 7e2a269..251f15e 100644
--- a/lib/librte_eal/common/include/eal_private.h
+++ b/lib/librte_eal/common/include/eal_private.h
@@ -128,6 +128,28 @@ int rte_eal_log_init(const char *id, int facility);
*/
int rte_eal_pci_init(void);
+#ifdef RTE_LIBRTE_IVSHMEM
+/**
+ * Init the memory from IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_ivshmem_init(void);
+
+/**
+ * Init objects in IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_ivshmem_obj_init(void);
+#endif
+
struct rte_pci_driver;
struct rte_pci_device;
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index d2c6265..4611dcd 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -79,6 +79,9 @@ struct rte_memseg {
void *addr; /**< Start virtual address. */
uint64_t addr_64; /**< Makes sure addr is always 64 bits */
};
+#ifdef RTE_LIBRTE_IVSHMEM
+ phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
+#endif
size_t len; /**< Length of the segment. */
size_t hugepage_sz; /**< The pagesize of underlying memory */
int32_t socket_id; /**< NUMA socket ID. */
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index a4243e4..5e29ff1 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -75,6 +75,9 @@ struct rte_memzone {
void *addr; /**< Start virtual address. */
uint64_t addr_64; /**< Makes sure addr is always 64-bits */
};
+#ifdef RTE_LIBRTE_IVSHMEM
+ phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
+#endif
size_t len; /**< Length of the memzone. */
size_t hugepage_sz; /**< The page size of underlying memory */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 91f96bc..2667145 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -41,6 +41,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_ring
CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
CFLAGS += -I$(RTE_SDK)/lib/librte_ether
+CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_ring
CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_pcap
CFLAGS += $(WERROR_FLAGS) -O3
@@ -57,6 +58,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_timer.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_interrupts.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
+ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
+endif
# from common dir
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
@@ -75,6 +79,9 @@ CFLAGS_eal.o := -D_GNU_SOURCE
CFLAGS_eal_thread.o := -D_GNU_SOURCE
CFLAGS_eal_log.o := -D_GNU_SOURCE
CFLAGS_eal_common_log.o := -D_GNU_SOURCE
+CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
+CFLAGS_eal_pci.o := -D_GNU_SOURCE
+CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
# workaround for a gcc bug with noreturn attribute
# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 7a32794..1ddfb65 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -935,6 +935,14 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_cpu_init() < 0)
rte_panic("Cannot detect lcores\n");
+ if (rte_eal_pci_init() < 0)
+ rte_panic("Cannot init PCI\n");
+
+#ifdef RTE_LIBRTE_IVSHMEM
+ if (rte_eal_ivshmem_init() < 0)
+ rte_panic("Cannot init IVSHMEM\n");
+#endif
+
if (rte_eal_memory_init() < 0)
rte_panic("Cannot init memory\n");
@@ -947,6 +955,11 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_tailqs_init() < 0)
rte_panic("Cannot init tail queues for objects\n");
+#ifdef RTE_LIBRTE_IVSHMEM
+ if (rte_eal_ivshmem_obj_init() < 0)
+ rte_panic("Cannot init IVSHMEM objects\n");
+#endif
+
if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
rte_panic("Cannot init logs\n");
@@ -959,9 +972,6 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_timer_init() < 0)
rte_panic("Cannot init HPET or TSC timers\n");
- if (rte_eal_pci_init() < 0)
- rte_panic("Cannot init PCI\n");
-
RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
rte_config.master_lcore, (int)thread_id);
diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
new file mode 100644
index 0000000..6191fef
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
@@ -0,0 +1,953 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_common.h>
+#include <rte_ivshmem.h>
+#include <rte_tailq_elem.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+
+#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
+#define PCI_DEVICE_ID_IVSHMEM 0x1110
+
+#define IVSHMEM_MAGIC 0x0BADC0DE
+#define IVSHMEM_METADATA_SIZE 0x1000
+
+#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
+#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
+
+#define PHYS 0x1
+#define VIRT 0x2
+#define IOREMAP 0x4
+#define FULL (PHYS|VIRT|IOREMAP)
+
+#define METADATA_SIZE_ALIGNED \
+ (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
+
+#define CONTAINS(x,y)\
+ (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+struct ivshmem_pci_device {
+ char path[PATH_MAX];
+ phys_addr_t ioremap_addr;
+};
+
+/* data type to store in config */
+struct ivshmem_segment {
+ struct rte_ivshmem_metadata_entry entry;
+ uint64_t align;
+ char path[PATH_MAX];
+};
+struct ivshmem_shared_config {
+ struct ivshmem_segment segment[RTE_MAX_MEMSEG];
+ uint32_t segment_idx;
+ struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
+ uint32_t pci_devs_idx;
+};
+static struct ivshmem_shared_config * ivshmem_config;
+static int memseg_idx;
+static int pagesz;
+
+/* Tailq heads to add rings to */
+TAILQ_HEAD(rte_ring_list, rte_ring);
+
+/*
+ * Utility functions
+ */
+
+static int
+is_ivshmem_device(struct rte_pci_device * dev)
+{
+ return (dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
+ && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM);
+}
+
+static void *
+map_metadata(int fd, uint64_t len)
+{
+ size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
+ size_t aligned_len = METADATA_SIZE_ALIGNED;
+
+ return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, len - aligned_len);
+}
+
+static void
+unmap_metadata(void * ptr)
+{
+ munmap(ptr, sizeof(struct rte_ivshmem_metadata));
+}
+
+static int
+has_ivshmem_metadata(int fd, uint64_t len)
+{
+ struct rte_ivshmem_metadata metadata;
+ void * ptr;
+
+ ptr = map_metadata(fd, len);
+
+ if (ptr == MAP_FAILED)
+ return -1;
+
+ metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+ unmap_metadata(ptr);
+
+ return metadata.magic_number == IVSHMEM_MAGIC;
+}
+
+static void
+remove_segment(struct ivshmem_segment * ms, int len, int idx)
+{
+ int i;
+
+ for (i = idx; i < len - 1; i++)
+ memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
+ memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
+}
+
+static int
+overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+ uint64_t start1, end1, start2, end2;
+ uint64_t p_start1, p_end1, p_start2, p_end2;
+ uint64_t i_start1, i_end1, i_start2, i_end2;
+ int result = 0;
+
+ /* gather virtual addresses */
+ start1 = mz1->addr_64;
+ end1 = mz1->addr_64 + mz1->len;
+ start2 = mz2->addr_64;
+ end2 = mz2->addr_64 + mz2->len;
+
+ /* gather physical addresses */
+ p_start1 = mz1->phys_addr;
+ p_end1 = mz1->phys_addr + mz1->len;
+ p_start2 = mz2->phys_addr;
+ p_end2 = mz2->phys_addr + mz2->len;
+
+ /* gather ioremap addresses */
+ i_start1 = mz1->ioremap_addr;
+ i_end1 = mz1->ioremap_addr + mz1->len;
+ i_start2 = mz2->ioremap_addr;
+ i_end2 = mz2->ioremap_addr + mz2->len;
+
+ /* check for overlap in virtual addresses */
+ if (start1 >= start2 && start1 < end2)
+ result |= VIRT;
+ if (start2 >= start1 && start2 < end1)
+ result |= VIRT;
+
+ /* check for overlap in physical addresses */
+ if (p_start1 >= p_start2 && p_start1 < p_end2)
+ result |= PHYS;
+ if (p_start2 >= p_start1 && p_start2 < p_end1)
+ result |= PHYS;
+
+ /* check for overlap in ioremap addresses */
+ if (i_start1 >= i_start2 && i_start1 < i_end2)
+ result |= IOREMAP;
+ if (i_start2 >= i_start1 && i_start2 < i_end1)
+ result |= IOREMAP;
+
+ return result;
+}
+
+static int
+adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+ uint64_t start1, end1, start2, end2;
+ uint64_t p_start1, p_end1, p_start2, p_end2;
+ uint64_t i_start1, i_end1, i_start2, i_end2;
+ int result = 0;
+
+ /* gather virtual addresses */
+ start1 = mz1->addr_64;
+ end1 = mz1->addr_64 + mz1->len;
+ start2 = mz2->addr_64;
+ end2 = mz2->addr_64 + mz2->len;
+
+ /* gather physical addresses */
+ p_start1 = mz1->phys_addr;
+ p_end1 = mz1->phys_addr + mz1->len;
+ p_start2 = mz2->phys_addr;
+ p_end2 = mz2->phys_addr + mz2->len;
+
+ /* gather ioremap addresses */
+ i_start1 = mz1->ioremap_addr;
+ i_end1 = mz1->ioremap_addr + mz1->len;
+ i_start2 = mz2->ioremap_addr;
+ i_end2 = mz2->ioremap_addr + mz2->len;
+
+ /* check if segments are virtually adjacent */
+ if (start1 == end2)
+ result |= VIRT;
+ if (start2 == end1)
+ result |= VIRT;
+
+ /* check if segments are physically adjacent */
+ if (p_start1 == p_end2)
+ result |= PHYS;
+ if (p_start2 == p_end1)
+ result |= PHYS;
+
+ /* check if segments are ioremap-adjacent */
+ if (i_start1 == i_end2)
+ result |= IOREMAP;
+ if (i_start2 == i_end1)
+ result |= IOREMAP;
+
+ return result;
+}
+
+static int
+has_adjacent_segments(struct ivshmem_segment * ms, int len)
+{
+ int i, j, a;
+
+ for (i = 0; i < len; i++)
+ for (j = i + 1; j < len; j++) {
+ a = adjacent(&ms[i].entry.mz, &ms[j].entry.mz);
+
+ /* check if segments are adjacent virtually and/or physically but
+ * not ioremap (since that would indicate that they are from
+ * different PCI devices and thus don't need to be concatenated.
+ */
+ if ((a & (VIRT|PHYS)) > 0 && (a & IOREMAP) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static int
+has_overlapping_segments(struct ivshmem_segment * ms, int len)
+{
+ int i, j;
+
+ for (i = 0; i < len; i++)
+ for (j = i + 1; j < len; j++)
+ if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
+ return 1;
+ return 0;
+}
+
+static int
+seg_compare(const void * a, const void * b)
+{
+ const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
+ const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
+
+ /* move unallocated zones to the end */
+ if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
+ return 0;
+ if (s1->entry.mz.addr == 0)
+ return 1;
+ if (s2->entry.mz.addr == 0)
+ return -1;
+
+ return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
+}
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+static void
+entry_dump(struct rte_ivshmem_metadata_entry *e)
+{
+ RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
+ RTE_PTR_ADD(e->mz.addr, e->mz.len));
+ RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ e->mz.phys_addr,
+ e->mz.phys_addr + e->mz.len);
+ RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ e->mz.ioremap_addr,
+ e->mz.ioremap_addr + e->mz.len);
+ RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
+ RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
+}
+#endif
+
+
+
+/*
+ * Actual useful code
+ */
+
+/* read through metadata mapped from the IVSHMEM device */
+static int
+read_metadata(char * path, int path_len, int fd, uint64_t flen)
+{
+ struct rte_ivshmem_metadata metadata;
+ struct rte_ivshmem_metadata_entry * entry;
+ int idx, i;
+ void * ptr;
+
+ ptr = map_metadata(fd, flen);
+
+ if (ptr == MAP_FAILED)
+ return -1;
+
+ metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+ unmap_metadata(ptr);
+
+ RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
+
+ idx = ivshmem_config->segment_idx;
+
+ for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
+ idx <= RTE_MAX_MEMSEG; i++) {
+
+ if (idx == RTE_MAX_MEMSEG) {
+ RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
+ return -1;
+ }
+
+ entry = &metadata.entry[i];
+
+ /* stop on uninitialized memzone */
+ if (entry->mz.len == 0)
+ break;
+
+ /* copy metadata entry */
+ memcpy(&ivshmem_config->segment[idx].entry, entry,
+ sizeof(struct rte_ivshmem_metadata_entry));
+
+ /* copy path */
+ rte_snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
+
+ idx++;
+ }
+ ivshmem_config->segment_idx = idx;
+
+ return 0;
+}
+
+/* check through each segment and look for adjacent or overlapping ones. */
+static int
+cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
+{
+ struct ivshmem_segment * s, * tmp;
+ int i, j, concat, seg_adjacent, seg_overlapping;
+ uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
+
+ qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
+ seg_compare);
+
+ while (has_overlapping_segments(ms, tbl_len) ||
+ has_adjacent_segments(ms, tbl_len)) {
+
+ for (i = 0; i < tbl_len; i++) {
+ s = &ms[i];
+
+ concat = 0;
+
+ for (j = i + 1; j < tbl_len; j++) {
+ tmp = &ms[j];
+
+ /* check if this segment is overlapping with existing segment,
+ * or is adjacent to existing segment */
+ seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
+ seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
+
+ /* check if segments fully overlap or are fully adjacent */
+ if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+ entry_dump(&s->entry);
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+ entry_dump(&tmp->entry);
+#endif
+
+ start1 = s->entry.mz.addr_64;
+ start2 = tmp->entry.mz.addr_64;
+ p_start1 = s->entry.mz.phys_addr;
+ p_start2 = tmp->entry.mz.phys_addr;
+ i_start1 = s->entry.mz.ioremap_addr;
+ i_start2 = tmp->entry.mz.ioremap_addr;
+ end1 = s->entry.mz.addr_64 + s->entry.mz.len;
+ end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
+
+ /* settle for minimum start address and maximum length */
+ s->entry.mz.addr_64 = RTE_MIN(start1, start2);
+ s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
+ s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
+ s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
+ s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
+ concat = 1;
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
+ entry_dump(&s->entry);
+
+#endif
+ }
+ /* if segments not fully overlap, we have an error condition.
+ * adjacent segments can coexist.
+ */
+ else if (seg_overlapping > 0) {
+ RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+ entry_dump(&s->entry);
+ RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+ entry_dump(&tmp->entry);
+#endif
+ return -1;
+ }
+ if (concat)
+ break;
+ }
+ /* if we concatenated, remove segment at j */
+ if (concat) {
+ remove_segment(ms, tbl_len, j);
+ tbl_len--;
+ break;
+ }
+ }
+ }
+
+ return tbl_len;
+}
+
+static int
+create_shared_config(void)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ /* build ivshmem config file path */
+ rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+ internal_config.hugefile_prefix);
+
+ fd = open(path, O_CREAT | O_RDWR);
+
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ /* try ex-locking first - if the file is locked, we have a problem */
+ if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ ftruncate(fd, sizeof(struct ivshmem_shared_config));
+
+ ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (ivshmem_config == MAP_FAILED)
+ return -1;
+
+ memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
+
+ /* change the exclusive lock we got earlier to a shared lock */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+ return -1;
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+/* open shared config file and, if present, map the config.
+ * having no config file is not an error condition, as we later check if
+ * ivshmem_config is NULL (if it is, that means nothing was mapped). */
+static int
+open_shared_config(void)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ /* build ivshmem config file path */
+ rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+ internal_config.hugefile_prefix);
+
+ fd = open(path, O_RDONLY);
+
+ /* if the file doesn't exist, just return success */
+ if (fd < 0 && errno == ENOENT)
+ return 0;
+ /* else we have an error condition */
+ else if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ /* try ex-locking first - if the lock *does* succeed, this means it's a
+ * stray config file, so it should be deleted.
+ */
+ if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
+
+ /* if we can't remove the file, something is wrong */
+ if (unlink(path) < 0) {
+ RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
+ strerror(errno));
+ return -1;
+ }
+
+ /* release the lock */
+ flock(fd, LOCK_UN);
+ close(fd);
+
+ /* return success as having a stray config file is equivalent to not
+ * having config file at all.
+ */
+ return 0;
+ }
+
+ ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+ PROT_READ, MAP_SHARED, fd, 0);
+
+ if (ivshmem_config == MAP_FAILED)
+ return -1;
+
+ /* place a shared lock on config file */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+ return -1;
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+/*
+ * This function does the following:
+ *
+ * 1) Builds a table of ivshmem_segments with proper offset alignment
+ * 2) Cleans up that table so that we don't have any overlapping or adjacent
+ * memory segments
+ * 3) Creates memsegs from this table and maps them into memory.
+ */
+static inline int
+map_all_segments(void)
+{
+ struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
+ struct ivshmem_pci_device * pci_dev;
+ struct rte_mem_config * mcfg;
+ struct ivshmem_segment * seg;
+ int fd, fd_zero;
+ unsigned i, j;
+ struct rte_memzone mz;
+ struct rte_memseg ms;
+ void * base_addr;
+ uint64_t align, len;
+ phys_addr_t ioremap_addr;
+
+ ioremap_addr = 0;
+
+ memset(ms_tbl, 0, sizeof(ms_tbl));
+ memset(&mz, 0, sizeof(struct rte_memzone));
+ memset(&ms, 0, sizeof(struct rte_memseg));
+
+ /* first, build a table of memsegs to map, to avoid failed mmaps due to
+ * overlaps
+ */
+ for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
+ if (i == RTE_MAX_MEMSEG) {
+ RTE_LOG(ERR, EAL, "Too many segments requested!\n");
+ return -1;
+ }
+
+ seg = &ivshmem_config->segment[i];
+
+ /* copy segment to table */
+ memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
+
+ /* find ioremap addr */
+ for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
+ pci_dev = &ivshmem_config->pci_devs[j];
+ if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
+ ioremap_addr = pci_dev->ioremap_addr;
+ break;
+ }
+ }
+ if (ioremap_addr == 0) {
+ RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
+ return -1;
+ }
+
+ /* work out alignments */
+ align = seg->entry.mz.addr_64 -
+ RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
+ len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
+
+ /* save original alignments */
+ ms_tbl[i].align = align;
+
+ /* create a memory zone */
+ mz.addr_64 = seg->entry.mz.addr_64 - align;
+ mz.len = len;
+ mz.hugepage_sz = seg->entry.mz.hugepage_sz;
+ mz.phys_addr = seg->entry.mz.phys_addr - align;
+
+ /* find true physical address */
+ mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
+
+ ms_tbl[i].entry.offset = seg->entry.offset - align;
+
+ memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
+ }
+
+ /* clean up the segments */
+ memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
+
+ if (memseg_idx < 0)
+ return -1;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ fd_zero = open("/dev/zero", O_RDWR);
+
+ if (fd_zero < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
+ return -1;
+ }
+
+ /* create memsegs and put them into DPDK memory */
+ for (i = 0; i < (unsigned) memseg_idx; i++) {
+
+ seg = &ms_tbl[i];
+
+ ms.addr_64 = seg->entry.mz.addr_64;
+ ms.hugepage_sz = seg->entry.mz.hugepage_sz;
+ ms.len = seg->entry.mz.len;
+ ms.nchannel = rte_memory_get_nchannel();
+ ms.nrank = rte_memory_get_nrank();
+ ms.phys_addr = seg->entry.mz.phys_addr;
+ ms.ioremap_addr = seg->entry.mz.ioremap_addr;
+ ms.socket_id = seg->entry.mz.socket_id;
+
+ base_addr = mmap(ms.addr, ms.len,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
+
+ if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+ RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
+ return -1;
+ }
+
+ fd = open(seg->path, O_RDWR);
+
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
+ strerror(errno));
+ return -1;
+ }
+
+ munmap(ms.addr, ms.len);
+
+ base_addr = mmap(ms.addr, ms.len,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ seg->entry.offset);
+
+
+ if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+ RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
+ "expected %p got %p (%s)\n", ms.addr, base_addr,
+ strerror(errno));
+ return -1;
+ }
+
+ RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
+ "offset 0x%" PRIx64 "\n",
+ ms.addr, ms.len, seg->entry.offset);
+
+ /* put the pointers back into their real positions using original
+ * alignment */
+ ms.addr_64 += seg->align;
+ ms.phys_addr += seg->align;
+ ms.ioremap_addr += seg->align;
+ ms.len -= seg->align;
+
+ /* at this point, the rest of DPDK memory is not initialized, so we
+ * expect memsegs to be empty */
+ memcpy(&mcfg->memseg[i], &ms,
+ sizeof(struct rte_memseg));
+ memcpy(&mcfg->free_memseg[i], &ms,
+ sizeof(struct rte_memseg));
+
+
+ /* adjust the free_memseg so that there's no free space left */
+ mcfg->free_memseg[i].ioremap_addr += mcfg->free_memseg[i].len;
+ mcfg->free_memseg[i].phys_addr += mcfg->free_memseg[i].len;
+ mcfg->free_memseg[i].addr_64 += mcfg->free_memseg[i].len;
+ mcfg->free_memseg[i].len = 0;
+
+ close(fd);
+
+ RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
+ ms.len);
+ }
+
+ return 0;
+}
+
+/* this happens at a later stage, after general EAL memory initialization */
+int
+rte_eal_ivshmem_obj_init(void)
+{
+ struct rte_ring_list* ring_list = NULL;
+ struct rte_mem_config * mcfg;
+ struct ivshmem_segment * seg;
+ struct rte_memzone * mz;
+ struct rte_ring * r;
+ unsigned i, ms, idx;
+ uint64_t offset;
+
+ /* secondary process would not need any object discovery - it'll all
+ * already be in shared config */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
+ return 0;
+
+ /* check that we have an initialised ring tail queue */
+ if ((ring_list =
+ RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_RING, rte_ring_list)) == NULL) {
+ RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
+ return -1;
+ }
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* create memzones */
+ for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
+
+ seg = &ivshmem_config->segment[i];
+
+ /* add memzone */
+ if (mcfg->memzone_idx == RTE_MAX_MEMZONE) {
+ RTE_LOG(ERR, EAL, "No more memory zones available!\n");
+ return -1;
+ }
+
+ idx = mcfg->memzone_idx;
+
+ RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
+ seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
+
+ memcpy(&mcfg->memzone[idx], &seg->entry.mz,
+ sizeof(struct rte_memzone));
+
+ /* find ioremap address */
+ for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
+ if (ms == RTE_MAX_MEMSEG) {
+ RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
+ return -1;
+ }
+ if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
+ offset = mcfg->memzone[idx].addr_64 -
+ mcfg->memseg[ms].addr_64;
+ mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
+ offset;
+ break;
+ }
+ }
+
+ mcfg->memzone_idx++;
+ }
+
+ /* find rings */
+ for (i = 0; i < mcfg->memzone_idx; i++) {
+ mz = &mcfg->memzone[i];
+
+ /* check if memzone has a ring prefix */
+ if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
+ sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
+ continue;
+
+ r = (struct rte_ring*) (mz->addr_64);
+
+ TAILQ_INSERT_TAIL(ring_list, r, next);
+
+ RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
+ }
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ rte_memzone_dump();
+ rte_ring_list_dump();
+#endif
+
+ return 0;
+}
+
+/* initialize ivshmem structures */
+int rte_eal_ivshmem_init(void)
+{
+ struct rte_pci_device * dev;
+ struct rte_pci_resource * res;
+ int fd, ret;
+ char path[PATH_MAX];
+
+ /* initialize everything to 0 */
+ memset(path, 0, sizeof(path));
+ ivshmem_config = NULL;
+
+ pagesz = getpagesize();
+
+ RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+
+ if (open_shared_config() < 0) {
+ RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
+ return -1;
+ }
+ }
+ else {
+
+ TAILQ_FOREACH(dev, &device_list, next) {
+
+ if (is_ivshmem_device(dev)) {
+
+ /* IVSHMEM memory is always on BAR2 */
+ res = &dev->mem_resource[2];
+
+ /* if we don't have a BAR2 */
+ if (res->len == 0)
+ continue;
+
+ /* construct pci device path */
+ rte_snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
+ dev->addr.domain, dev->addr.bus, dev->addr.devid,
+ dev->addr.function);
+
+ /* try to find memseg */
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n", path);
+ return -1;
+ }
+
+ /* check if it's a DPDK IVSHMEM device */
+ ret = has_ivshmem_metadata(fd, res->len);
+
+ /* is DPDK device */
+ if (ret == 1) {
+
+ /* config file creation is deferred until the first
+ * DPDK device is found. then, it has to be created
+ * only once. */
+ if (ivshmem_config == NULL &&
+ create_shared_config() < 0) {
+ RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
+ close(fd);
+ return -1;
+ }
+
+ if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
+ RTE_LOG(ERR, EAL, "Could not read metadata from"
+ " device %02x:%02x.%x!\n", dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ close(fd);
+ return -1;
+ }
+
+ if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
+ RTE_LOG(WARNING, EAL,
+ "IVSHMEM PCI device limit exceeded. Increase "
+ "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in "
+ "your config file.\n");
+ break;
+ }
+
+ RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
+ dev->addr.bus, dev->addr.devid, dev->addr.function);
+
+ ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
+ rte_snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
+ sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
+ path);
+
+ ivshmem_config->pci_devs_idx++;
+ }
+ /* failed to read */
+ else if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
+ strerror(errno));
+ close(fd);
+ return -1;
+ }
+ /* not a DPDK device */
+ else
+ RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
+
+ /* close the BAR fd */
+ close(fd);
+ }
+ }
+ }
+
+ /* ivshmem_config is not NULL only if config was created and/or mapped */
+ if (ivshmem_config) {
+ if (map_all_segments() < 0) {
+ RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
+ return -1;
+ }
+ }
+ else {
+ RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 3a1822e..6b78d89 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -113,6 +113,68 @@ static uint64_t baseaddr_offset;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
+static uint64_t
+get_physaddr(void * virtaddr)
+{
+ int fd;
+ uint64_t page, physaddr;
+ unsigned long virt_pfn;
+ int page_size;
+
+ /* standard page size */
+ page_size = getpagesize();
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ return (uint64_t) -1;
+ }
+
+ off_t offset;
+ virt_pfn = (unsigned long)virtaddr / page_size;
+ offset = sizeof(uint64_t) * virt_pfn;
+ if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
+ RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ close(fd);
+ return (uint64_t) -1;
+ }
+ if (read(fd, &page, sizeof(uint64_t)) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ close(fd);
+ return (uint64_t) -1;
+ }
+
+ /*
+ * the pfn (page frame number) are bits 0-54 (see
+ * pagemap.txt in linux Documentation)
+ */
+ physaddr = ((page & 0x7fffffffffffffULL) * page_size);
+ close(fd);
+ return physaddr;
+}
+
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value. We find
+ * it by browsing the /proc/self/pagemap special file.
+ */
+static int
+find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned i;
+ phys_addr_t addr;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ addr = get_physaddr(hugepg_tbl[i].orig_va);
+ if (addr == (phys_addr_t) -1)
+ return -1;
+ hugepg_tbl[i].physaddr = addr;
+ }
+ return 0;
+}
+
/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
@@ -209,7 +271,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
* map continguous physical blocks in contiguous virtual blocks.
*/
static int
-map_all_hugepages(struct hugepage *hugepg_tbl,
+map_all_hugepages(struct hugepage_file *hugepg_tbl,
struct hugepage_info *hpi, int orig)
{
int fd;
@@ -218,15 +280,25 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
void *vma_addr = NULL;
size_t vma_len = 0;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ RTE_SET_USED(vma_len);
+#endif
+
for (i = 0; i < hpi->num_pages[0]; i++) {
size_t hugepage_sz = hpi->hugepage_sz;
if (orig) {
hugepg_tbl[i].file_id = i;
hugepg_tbl[i].size = hugepage_sz;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
+ sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
+ hugepg_tbl[i].file_id);
+#else
eal_get_hugefile_path(hugepg_tbl[i].filepath,
sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
hugepg_tbl[i].file_id);
+#endif
hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
}
#ifndef RTE_ARCH_X86_64
@@ -239,6 +311,8 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
continue;
}
#endif
+
+#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
else if (vma_len == 0) {
unsigned j, num_pages;
@@ -260,6 +334,7 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
if (vma_addr == NULL)
vma_len = hugepage_sz;
}
+#endif
/* try to create hugepage file */
fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
@@ -302,77 +377,189 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
return 0;
}
-/* Unmap all hugepages from original mapping. */
-static int
-unmap_all_hugepages_orig(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
-{
- unsigned i;
- for (i = 0; i < hpi->num_pages[0]; i++) {
- if (hugepg_tbl[i].orig_va) {
- munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
- hugepg_tbl[i].orig_va = NULL;
- }
- }
- return 0;
-}
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
/*
- * For each hugepage in hugepg_tbl, fill the physaddr value. We find
- * it by browsing the /proc/self/pagemap special file.
+ * Remaps all hugepages into single file segments
*/
static int
-find_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
int fd;
- unsigned i;
- uint64_t page;
- unsigned long virt_pfn;
- int page_size;
+ unsigned i = 0, j, num_pages, page_idx = 0;
+ void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
+ size_t vma_len = 0;
+ size_t hugepage_sz = hpi->hugepage_sz;
+ size_t total_size, offset;
+ char filepath[MAX_HUGEPAGE_PATH];
+ phys_addr_t physaddr;
+ int socket;
- /* standard page size */
- page_size = getpagesize();
+ while (i < hpi->num_pages[0]) {
- fd = open("/proc/self/pagemap", O_RDONLY);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
- __func__, strerror(errno));
- return -1;
- }
+#ifndef RTE_ARCH_X86_64
+ /* for 32-bit systems, don't remap 1G pages, just reuse original
+ * map address as final map address.
+ */
+ if (hugepage_sz == RTE_PGSIZE_1G){
+ hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
+ hugepg_tbl[i].orig_va = NULL;
+ i++;
+ continue;
+ }
+#endif
- for (i = 0; i < hpi->num_pages[0]; i++) {
- off_t offset;
- virt_pfn = (unsigned long)hugepg_tbl[i].orig_va /
- page_size;
- offset = sizeof(uint64_t) * virt_pfn;
- if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
- RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
- __func__, strerror(errno));
- close(fd);
+ /* reserve a virtual area for next contiguous
+ * physical block: count the number of
+ * contiguous physical pages. */
+ for (j = i+1; j < hpi->num_pages[0] ; j++) {
+ if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz)
+ break;
+ }
+ num_pages = j - i;
+ vma_len = num_pages * hugepage_sz;
+
+ socket = hugepg_tbl[i].socket_id;
+
+ /* get the biggest virtual memory area up to
+ * vma_len. If it fails, vma_addr is NULL, so
+ * let the kernel provide the address. */
+ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+
+ /* If we can't find a big enough virtual area, work out how many pages
+ * we are going to get */
+ if (vma_addr == NULL)
+ j = i + 1;
+ else if (vma_len != num_pages * hugepage_sz) {
+ num_pages = vma_len / hugepage_sz;
+ j = i + num_pages;
+
+ }
+
+ hugepg_tbl[page_idx].file_id = page_idx;
+ eal_get_hugefile_path(filepath,
+ sizeof(filepath),
+ hpi->hugedir,
+ hugepg_tbl[page_idx].file_id);
+
+ /* try to create hugepage file */
+ fd = open(filepath, O_CREAT | O_RDWR, 0755);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
return -1;
}
- if (read(fd, &page, sizeof(uint64_t)) < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
- __func__, strerror(errno));
+
+ total_size = 0;
+ for (;i < j; i++) {
+
+ /* unmap current segment */
+ if (total_size > 0)
+ munmap(vma_addr, total_size);
+
+ /* unmap original page */
+ munmap(hugepg_tbl[i].orig_va, hugepage_sz);
+ unlink(hugepg_tbl[i].filepath);
+
+ total_size += hugepage_sz;
+
+ old_addr = vma_addr;
+
+ /* map new, bigger segment */
+ vma_addr = mmap(vma_addr, total_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
+ RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ /* touch the page. this is needed because kernel postpones mapping
+ * creation until the first page fault. with this, we pin down
+ * the page and it is marked as used and gets into process' pagemap.
+ */
+ for (offset = 0; offset < total_size; offset += hugepage_sz)
+ *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset));
+ }
+
+ /* set shared flock on the file. */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
+ __func__, strerror(errno));
close(fd);
return -1;
}
- /*
- * the pfn (page frame number) are bits 0-54 (see
- * pagemap.txt in linux Documentation)
+ rte_snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
+ filepath);
+
+ physaddr = get_physaddr(vma_addr);
+
+ if (physaddr == (phys_addr_t) -1)
+ return -1;
+
+ hugepg_tbl[page_idx].final_va = vma_addr;
+
+ hugepg_tbl[page_idx].physaddr = physaddr;
+
+ hugepg_tbl[page_idx].repeated = num_pages;
+
+ hugepg_tbl[page_idx].socket_id = socket;
+
+ close(fd);
+
+ /* verify the memory segment - that is, check that every VA corresponds
+ * to the physical address we expect to see
*/
- hugepg_tbl[i].physaddr = ((page & 0x7fffffffffffffULL) * page_size);
+ for (offset = 0; offset < vma_len; offset += hugepage_sz) {
+ uint64_t expected_physaddr;
+
+ expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
+ page_addr = RTE_PTR_ADD(vma_addr, offset);
+ physaddr = get_physaddr(page_addr);
+
+ if (physaddr != expected_physaddr) {
+ RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
+ "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
+ " (expected 0x%" PRIx64 ")\n",
+ page_addr, offset, physaddr, expected_physaddr);
+ return -1;
+ }
+ }
+
+ /* zero out the whole segment */
+ memset(hugepg_tbl[page_idx].final_va, 0, total_size);
+
+ page_idx++;
}
- close(fd);
- return 0;
+
+ /* zero out the rest */
+ memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
+ return page_idx;
}
+#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
+
+/* Unmap all hugepages from original mapping */
+static int
+unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned i;
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ if (hugepg_tbl[i].orig_va) {
+ munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
+ hugepg_tbl[i].orig_va = NULL;
+ }
+ }
+ return 0;
+}
+#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
/*
* Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
* page.
*/
static int
-find_numasocket(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
int socket_id;
char *end, *nodestr;
@@ -455,12 +642,12 @@ error:
* is only done at init time.
*/
static int
-sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
unsigned i, j;
int smallest_idx;
uint64_t smallest_addr;
- struct hugepage tmp;
+ struct hugepage_file tmp;
for (i = 0; i < hpi->num_pages[0]; i++) {
smallest_addr = 0;
@@ -486,10 +673,10 @@ sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
}
/* swap the 2 entries in the table */
- memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage));
+ memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage_file));
memcpy(&hugepg_tbl[smallest_idx], &hugepg_tbl[i],
- sizeof(struct hugepage));
- memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage));
+ sizeof(struct hugepage_file));
+ memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file));
}
return 0;
}
@@ -519,8 +706,8 @@ create_shared_memory(const char *filename, const size_t mem_size)
* destination is typically the shared memory.
*/
static int
-copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
- const struct hugepage * src, int src_size)
+copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
+ const struct hugepage_file * src, int src_size)
{
int src_pos, dst_pos = 0;
@@ -529,7 +716,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
/* error on overflow attempt */
if (dst_pos == dest_size)
return -1;
- memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage));
+ memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
dst_pos++;
}
}
@@ -541,7 +728,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
* ALL hugepages (not just those we need), additional unmapping needs to be done.
*/
static int
-unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
+unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
struct hugepage_info *hpi,
unsigned num_hp_info)
{
@@ -556,9 +743,16 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
for (size = 0; size < num_hp_info; size++) {
for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
unsigned pages_found = 0;
+
/* traverse until we have unmapped all the unused pages */
for (page = 0; page < nrpages; page++) {
- struct hugepage *hp = &hugepg_tbl[page];
+ struct hugepage_file *hp = &hugepg_tbl[page];
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ /* if this page was already cleared */
+ if (hp->final_va == NULL)
+ continue;
+#endif
/* find a page that matches the criteria */
if ((hp->size == hpi[size].hugepage_sz) &&
@@ -566,17 +760,67 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
/* if we skipped enough pages, unmap the rest */
if (pages_found == hpi[size].num_pages[socket]) {
- munmap(hp->final_va, hp->size);
+ uint64_t unmap_len;
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ unmap_len = hp->size * hp->repeated;
+#else
+ unmap_len = hp->size;
+#endif
+
+ /* get start addr and len of the remaining segment */
+ munmap(hp->final_va, (size_t) unmap_len);
+
hp->final_va = NULL;
- if (remove(hp->filepath) == -1) {
+ if (unlink(hp->filepath) == -1) {
RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
__func__, hp->filepath, strerror(errno));
return -1;
}
}
- /* lock the page and skip */
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ /* else, check how much do we need to map */
+ else {
+ int nr_pg_left =
+ hpi[size].num_pages[socket] - pages_found;
+
+ /* if we need enough memory to fit into the segment */
+ if (hp->repeated <= nr_pg_left) {
+ pages_found += hp->repeated;
+ }
+ /* truncate the segment */
+ else {
+ uint64_t final_size = nr_pg_left * hp->size;
+ uint64_t seg_size = hp->repeated * hp->size;
+
+ void * unmap_va = RTE_PTR_ADD(hp->final_va,
+ final_size);
+ int fd;
+
+ munmap(unmap_va, seg_size - final_size);
+
+ fd = open(hp->filepath, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ hp->filepath, strerror(errno));
+ return -1;
+ }
+ if (ftruncate(fd, final_size) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
+ hp->filepath, strerror(errno));
+ return -1;
+ }
+ close(fd);
+
+ pages_found += nr_pg_left;
+ hp->repeated = nr_pg_left;
+ }
+ }
+#else
+ /* else, lock the page and skip */
else
pages_found++;
+#endif
} /* match page */
} /* foreach page */
@@ -712,15 +956,18 @@ static int
rte_eal_hugepage_init(void)
{
struct rte_mem_config *mcfg;
- struct hugepage *hugepage, *tmp_hp = NULL;
+ struct hugepage_file *hugepage, *tmp_hp = NULL;
struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
uint64_t memory[RTE_MAX_NUMA_NODES];
unsigned hp_offset;
int i, j, new_memseg;
- int nrpages, total_pages = 0;
+ int nr_hugefiles, nr_hugepages = 0;
void *addr;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ int new_pages_count[MAX_HUGEPAGE_SIZES];
+#endif
memset(used_hp, 0, sizeof(used_hp));
@@ -744,7 +991,7 @@ rte_eal_hugepage_init(void)
/* meanwhile, also initialize used_hp hugepage sizes in used_hp */
used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;
- total_pages += internal_config.hugepage_info[i].num_pages[0];
+ nr_hugepages += internal_config.hugepage_info[i].num_pages[0];
}
/*
@@ -753,11 +1000,11 @@ rte_eal_hugepage_init(void)
* processing done on these pages, shared memory will be created
* at a later stage.
*/
- tmp_hp = malloc(total_pages * sizeof(struct hugepage));
+ tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
if (tmp_hp == NULL)
goto fail;
- memset(tmp_hp, 0, total_pages * sizeof(struct hugepage));
+ memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
hp_offset = 0; /* where we start the current page size entries */
@@ -772,7 +1019,7 @@ rte_eal_hugepage_init(void)
*/
hpi = &internal_config.hugepage_info[i];
- if (hpi->num_pages == 0)
+ if (hpi->num_pages[0] == 0)
continue;
/* map all hugepages available */
@@ -783,7 +1030,7 @@ rte_eal_hugepage_init(void)
}
/* find physical addresses and sockets for each hugepage */
- if (find_physaddr(&tmp_hp[hp_offset], hpi) < 0){
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
(unsigned)(hpi->hugepage_sz / 0x100000));
goto fail;
@@ -798,6 +1045,18 @@ rte_eal_hugepage_init(void)
if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0)
goto fail;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ /* remap all hugepages into single file segments */
+ new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
+ if (new_pages_count[i] < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ /* we have processed a num of hugepages of this size, so inc offset */
+ hp_offset += new_pages_count[i];
+#else
/* remap all hugepages */
if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){
RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
@@ -811,22 +1070,38 @@ rte_eal_hugepage_init(void)
/* we have processed a num of hugepages of this size, so inc offset */
hp_offset += hpi->num_pages[0];
+#endif
}
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ nr_hugefiles = 0;
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+ nr_hugefiles += new_pages_count[i];
+ }
+#else
+ nr_hugefiles = nr_hugepages;
+#endif
+
+
/* clean out the numbers of pages */
for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)
for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
internal_config.hugepage_info[i].num_pages[j] = 0;
/* get hugepages for each socket */
- for (i = 0; i < total_pages; i++) {
+ for (i = 0; i < nr_hugefiles; i++) {
int socket = tmp_hp[i].socket_id;
/* find a hugepage info with right size and increment num_pages */
for (j = 0; j < (int) internal_config.num_hugepage_sizes; j++) {
if (tmp_hp[i].size ==
internal_config.hugepage_info[j].hugepage_sz) {
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ internal_config.hugepage_info[j].num_pages[socket] +=
+ tmp_hp[i].repeated;
+#else
internal_config.hugepage_info[j].num_pages[socket]++;
+#endif
}
}
}
@@ -836,12 +1111,12 @@ rte_eal_hugepage_init(void)
memory[i] = internal_config.socket_mem[i];
/* calculate final number of pages */
- nrpages = calc_num_pages_per_socket(memory,
+ nr_hugepages = calc_num_pages_per_socket(memory,
internal_config.hugepage_info, used_hp,
internal_config.num_hugepage_sizes);
/* error if not enough memory available */
- if (nrpages < 0)
+ if (nr_hugepages < 0)
goto fail;
/* reporting in! */
@@ -861,12 +1136,13 @@ rte_eal_hugepage_init(void)
/* create shared memory */
hugepage = create_shared_memory(eal_hugepage_info_path(),
- nrpages * sizeof(struct hugepage));
+ nr_hugefiles * sizeof(struct hugepage_file));
if (hugepage == NULL) {
RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
goto fail;
}
+ memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
/*
* unmap pages that we won't need (looks at used_hp).
@@ -883,8 +1159,8 @@ rte_eal_hugepage_init(void)
* this procedure only copies those hugepages that have final_va
* not NULL. has overflow protection.
*/
- if (copy_hugepages_to_shared_mem(hugepage, nrpages,
- tmp_hp, total_pages) < 0) {
+ if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
+ tmp_hp, nr_hugefiles) < 0) {
RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
goto fail;
}
@@ -893,9 +1169,16 @@ rte_eal_hugepage_init(void)
free(tmp_hp);
tmp_hp = NULL;
- memset(mcfg->memseg, 0, sizeof(mcfg->memseg));
- j = -1;
- for (i = 0; i < nrpages; i++) {
+ /* find earliest free memseg - this is needed because in case of IVSHMEM,
+ * segments might have already been initialized */
+ for (j = 0; j < RTE_MAX_MEMSEG; j++)
+ if (mcfg->memseg[j].addr == NULL) {
+ /* move to previous segment and exit loop */
+ j--;
+ break;
+ }
+
+ for (i = 0; i < nr_hugefiles; i++) {
new_memseg = 0;
/* if this is a new section, create a new memseg */
@@ -919,7 +1202,11 @@ rte_eal_hugepage_init(void)
mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
+#else
mcfg->memseg[j].len = hugepage[i].size;
+#endif
mcfg->memseg[j].socket_id = hugepage[i].socket_id;
mcfg->memseg[j].hugepage_sz = hugepage[i].size;
}
@@ -930,21 +1217,19 @@ rte_eal_hugepage_init(void)
hugepage[i].memseg_id = j;
}
- if (i < nrpages) {
+ if (i < nr_hugefiles) {
RTE_LOG(ERR, EAL, "Can only reserve %d pages "
"from %d requested\n"
"Current %s=%d is not enough\n"
"Please either increase it or request less amount "
"of memory.\n",
- i, nrpages, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
+ i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
RTE_MAX_MEMSEG);
return (-ENOMEM);
}
-
return 0;
-
fail:
if (tmp_hp)
free(tmp_hp);
@@ -973,7 +1258,7 @@ static int
rte_eal_hugepage_attach(void)
{
const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- const struct hugepage *hp = NULL;
+ const struct hugepage_file *hp = NULL;
unsigned num_hp = 0;
unsigned i, s = 0; /* s used to track the segment number */
off_t size;
@@ -1008,6 +1293,15 @@ rte_eal_hugepage_attach(void)
if (mcfg->memseg[s].len == 0)
break;
+#ifdef RTE_LIBRTE_IVSHMEM
+ /*
+ * if segment has ioremap address set, it's an IVSHMEM segment and
+ * doesn't need mapping as it was already mapped earlier
+ */
+ if (mcfg->memseg[s].ioremap_addr != 0)
+ continue;
+#endif
+
/*
* fdzero is mmapped to get a contiguous block of virtual
* addresses of the appropriate memseg size.
@@ -1018,9 +1312,9 @@ rte_eal_hugepage_attach(void)
if (base_addr == MAP_FAILED ||
base_addr != mcfg->memseg[s].addr) {
RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
- "in /dev/zero to requested address [%p]\n",
+ "in /dev/zero to requested address [%p]: '%s'\n",
(unsigned long long)mcfg->memseg[s].len,
- mcfg->memseg[s].addr);
+ mcfg->memseg[s].addr, strerror(errno));
if (aslr_enabled() > 0) {
RTE_LOG(ERR, EAL, "It is recommended to "
"disable ASLR in the kernel "
@@ -1038,14 +1332,24 @@ rte_eal_hugepage_attach(void)
goto error;
}
- num_hp = size / sizeof(struct hugepage);
- RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp);
+ num_hp = size / sizeof(struct hugepage_file);
+ RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
s = 0;
while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
void *addr, *base_addr;
uintptr_t offset = 0;
-
+ size_t mapping_size;
+#ifdef RTE_LIBRTE_IVSHMEM
+ /*
+ * if segment has ioremap address set, it's an IVSHMEM segment and
+ * doesn't need mapping as it was already mapped earlier
+ */
+ if (mcfg->memseg[s].ioremap_addr != 0) {
+ s++;
+ continue;
+ }
+#endif
/*
* free previously mapped memory so we can map the
* hugepages into the space
@@ -1064,16 +1368,22 @@ rte_eal_hugepage_attach(void)
hp[i].filepath);
goto error;
}
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ mapping_size = hp[i].size * hp[i].repeated;
+#else
+ mapping_size = hp[i].size;
+#endif
addr = mmap(RTE_PTR_ADD(base_addr, offset),
- hp[i].size, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_FIXED, fd, 0);
+ mapping_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
close(fd); /* close file both on success and on failure */
- if (addr == MAP_FAILED) {
+ if (addr == MAP_FAILED ||
+ addr != RTE_PTR_ADD(base_addr, offset)) {
RTE_LOG(ERR, EAL, "Could not mmap %s\n",
hp[i].filepath);
goto error;
}
- offset+=hp[i].size;
+ offset+=mapping_size;
}
}
RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h b/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h
index 7ffd5cd..034e58d 100644
--- a/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h
+++ b/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h
@@ -46,6 +46,8 @@
#include <stdint.h>
#include <limits.h>
#include <unistd.h>
+#include <stdlib.h>
+
#include <rte_string_fns.h>
#include "eal_internal_cfg.h"
@@ -84,6 +86,7 @@ eal_hugepage_info_path(void)
/** String format for hugepage map files. */
#define HUGEFILE_FMT "%s/%smap_%d"
+#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"
static inline const char *
eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
@@ -94,6 +97,17 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
return buffer;
}
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+static inline const char *
+eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+ rte_snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
+ internal_config.hugefile_prefix, f_id);
+ buffer[buflen - 1] = '\0';
+ return buffer;
+}
+#endif
+
/** define the default filename prefix for the %s values above */
#define HUGEFILE_PREFIX_DEFAULT "rte"
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h b/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h
index 82dd641..064cdb0 100644
--- a/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h
+++ b/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h
@@ -35,6 +35,8 @@
#define RTE_LINUXAPP_HUGEPAGES_H_
#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
#define MAX_HUGEPAGE_PATH PATH_MAX
@@ -42,7 +44,7 @@
* Structure used to store informations about hugepages that we mapped
* through the files in hugetlbfs.
*/
-struct hugepage {
+struct hugepage_file {
void *orig_va; /**< virtual addr of first mmap() */
void *final_va; /**< virtual addr of 2nd mmap() */
uint64_t physaddr; /**< physical addr */
@@ -50,6 +52,9 @@ struct hugepage {
int socket_id; /**< NUMA socket ID */
int file_id; /**< the '%d' in HUGEFILE_FMT */
int memseg_id; /**< the memory segment to which page belongs */
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+ int repeated; /**< number of times the page size is repeated */
+#endif
char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
};
diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
new file mode 100644
index 0000000..c94f926
--- /dev/null
+++ b/lib/librte_ivshmem/Makefile
@@ -0,0 +1,48 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_ivshmem.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c
new file mode 100644
index 0000000..d62d016
--- /dev/null
+++ b/lib/librte_ivshmem/rte_ivshmem.c
@@ -0,0 +1,884 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include <rte_eal_memconfig.h>
+#include <rte_memory.h>
+#include <rte_ivshmem.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_spinlock.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include "rte_ivshmem.h"
+
+#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
+#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
+#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
+#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
+#define IVSHMEM_MAX_PAGES (1 << 12)
+#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
+#define METADATA_SIZE_ALIGNED \
+ (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
+
+#define GET_PAGEMAP_ADDR(in,addr,dlm,err) \
+{ \
+ char *end; \
+ errno = 0; \
+ addr = strtoull((in), &end, 16); \
+ if (errno != 0 || *end != (dlm)) { \
+ RTE_LOG(ERR, EAL, err); \
+ goto error; \
+ } \
+ (in) = end + 1; \
+}
+
+static int pagesz;
+
+struct memseg_cache_entry {
+ char filepath[PATH_MAX];
+ uint64_t offset;
+ uint64_t len;
+};
+
+struct ivshmem_config {
+ struct rte_ivshmem_metadata * metadata;
+ struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
+ /**< account for multiple files per segment case */
+ struct flock lock;
+ rte_spinlock_t sl;
+};
+
+static struct ivshmem_config
+ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
+
+static rte_spinlock_t global_cfg_sl;
+
+static struct ivshmem_config *
+get_config_by_name(const char * name)
+{
+ struct rte_ivshmem_metadata * config;
+ unsigned i;
+
+ for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
+ config = ivshmem_global_config[i].metadata;
+ if (config == NULL)
+ return NULL;
+ if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
+ return &ivshmem_global_config[i];
+ }
+
+ return NULL;
+}
+
+static int
+overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
+{
+ uint64_t start1, end1, start2, end2;
+
+ start1 = s1->addr_64;
+ end1 = s1->addr_64 + s1->len;
+ start2 = s2->addr_64;
+ end2 = s2->addr_64 + s2->len;
+
+ if (start1 >= start2 && start1 < end2)
+ return 1;
+ if (start2 >= start1 && start2 < end1)
+ return 1;
+
+ return 0;
+}
+
+static struct rte_memzone *
+get_memzone_by_addr(const void * addr)
+{
+ struct rte_memzone * tmp, * mz;
+ struct rte_mem_config * mcfg;
+ int i;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+ mz = NULL;
+
+ /* find memzone for the ring */
+ for (i = 0; i < RTE_MAX_MEMZONE; i++) {
+ tmp = &mcfg->memzone[i];
+
+ if (tmp->addr_64 == (uint64_t) addr) {
+ mz = tmp;
+ break;
+ }
+ }
+
+ return mz;
+}
+
+static int
+entry_compare(const void * a, const void * b)
+{
+ const struct rte_ivshmem_metadata_entry * e1 =
+ (const struct rte_ivshmem_metadata_entry*) a;
+ const struct rte_ivshmem_metadata_entry * e2 =
+ (const struct rte_ivshmem_metadata_entry*) b;
+
+ /* move unallocated zones to the end */
+ if (e1->mz.addr == NULL && e2->mz.addr == NULL)
+ return 0;
+ if (e1->mz.addr == 0)
+ return 1;
+ if (e2->mz.addr == 0)
+ return -1;
+
+ return e1->mz.phys_addr > e2->mz.phys_addr;
+}
+
+/* fills hugepage cache entry for a given start virt_addr */
+static int
+get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
+{
+ uint64_t start_addr, end_addr;
+ char *start,*path_end;
+ char buf[PATH_MAX*2];
+ FILE *f;
+
+ start = NULL;
+ path_end = NULL;
+ start_addr = 0;
+
+ memset(e->filepath, 0, sizeof(e->filepath));
+
+ /* open /proc/self/maps */
+ f = fopen("/proc/self/maps", "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
+ return -1;
+ }
+
+ /* parse maps */
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+
+ /* get endptr to end of start addr */
+ start = buf;
+
+ GET_PAGEMAP_ADDR(start,start_addr,'-',
+ "Cannot find start address in maps!\n");
+
+ /* if start address is bigger than our address, skip */
+ if (start_addr > virt_addr)
+ continue;
+
+ GET_PAGEMAP_ADDR(start,end_addr,' ',
+ "Cannot find end address in maps!\n");
+
+ /* if end address is less than our address, skip */
+ if (end_addr <= virt_addr)
+ continue;
+
+ /* find where the path starts */
+ start = strstr(start, "/");
+
+ if (start == NULL)
+ continue;
+
+ /* at this point, we know that this is our map.
+ * now let's find the file */
+ path_end = strstr(start, "\n");
+ break;
+ }
+
+ if (path_end == NULL) {
+ RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
+ goto error;
+ }
+
+ /* calculate offset and copy the file path */
+ rte_snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
+
+ e->offset = virt_addr - start_addr;
+
+ fclose(f);
+
+ return 0;
+error:
+ fclose(f);
+ return -1;
+}
+
+/*
+ * This is a complex function. What it does is the following:
+ * 1. Goes through metadata and gets list of hugepages involved
+ * 2. Sorts the hugepages by size (1G first)
+ * 3. Goes through metadata again and writes correct offsets
+ * 4. Goes through pages and finds out their filenames, offsets etc.
+ */
+static int
+build_config(struct rte_ivshmem_metadata * metadata)
+{
+ struct rte_ivshmem_metadata_entry * e_local;
+ struct memseg_cache_entry * ms_local;
+ struct rte_memseg pages[IVSHMEM_MAX_PAGES];
+ struct rte_ivshmem_metadata_entry *entry;
+ struct memseg_cache_entry * c_entry, * prev_entry;
+ struct ivshmem_config * config;
+ unsigned i, j, mz_iter, ms_iter;
+ uint64_t biggest_len;
+ int biggest_idx;
+
+ /* return error if we try to use an unknown config file */
+ config = get_config_by_name(metadata->name);
+ if (config == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
+ goto fail_e;
+ }
+
+ memset(pages, 0, sizeof(pages));
+
+ e_local = malloc(sizeof(config->metadata->entry));
+ if (e_local == NULL)
+ goto fail_e;
+ ms_local = malloc(sizeof(config->memseg_cache));
+ if (ms_local == NULL)
+ goto fail_ms;
+
+
+ /* make local copies before doing anything */
+ memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
+ memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
+
+ qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
+ entry_compare);
+
+ /* first pass - collect all huge pages */
+ for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
+
+ entry = &e_local[mz_iter];
+
+ uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
+ entry->mz.hugepage_sz);
+ uint64_t offset = entry->mz.addr_64 - start_addr;
+ uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
+ entry->mz.hugepage_sz);
+
+ if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
+ continue;
+
+ int start_page;
+
+ /* find first unused page - mz are phys_addr sorted so we don't have to
+ * look out for holes */
+ for (i = 0; i < RTE_DIM(pages); i++) {
+
+ /* skip if we already have this page */
+ if (pages[i].addr_64 == start_addr) {
+ start_addr += entry->mz.hugepage_sz;
+ len -= entry->mz.hugepage_sz;
+ continue;
+ }
+ /* we found a new page */
+ else if (pages[i].addr_64 == 0) {
+ start_page = i;
+ break;
+ }
+ }
+ if (i == RTE_DIM(pages)) {
+ RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
+ goto fail;
+ }
+
+ /* populate however many pages the memzone has */
+ for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
+
+ pages[i].addr_64 = start_addr;
+ pages[i].len = entry->mz.hugepage_sz;
+ start_addr += entry->mz.hugepage_sz;
+ len -= entry->mz.hugepage_sz;
+ }
+ /* if there's still length left */
+ if (len != 0) {
+ RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
+ goto fail;
+ }
+ }
+
+ /* second pass - sort pages by size */
+ for (i = 0; i < RTE_DIM(pages); i++) {
+
+ if (pages[i].addr == NULL)
+ break;
+
+ biggest_len = 0;
+ biggest_idx = -1;
+
+ /*
+ * browse all entries starting at 'i', and find the
+ * entry with the smallest addr
+ */
+ for (j=i; j< RTE_DIM(pages); j++) {
+ if (pages[j].addr == NULL)
+ break;
+ if (biggest_len == 0 ||
+ pages[j].len > biggest_len) {
+ biggest_len = pages[j].len;
+ biggest_idx = j;
+ }
+ }
+
+ /* should not happen */
+ if (biggest_idx == -1) {
+ RTE_LOG(ERR, EAL, "Error sorting by size!\n");
+ goto fail;
+ }
+ if (i != (unsigned) biggest_idx) {
+ struct rte_memseg tmp;
+
+ memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
+
+ /* we don't want to break contiguousness, so instead of just
+ * swapping segments, we move all the preceding segments to the
+ * right and then put the old segment @ biggest_idx in place of
+ * segment @ i */
+ for (j = biggest_idx - 1; j >= i; j--) {
+ memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
+ memset(&pages[j], 0, sizeof(struct rte_memseg));
+ }
+
+ /* put old biggest segment to its new place */
+ memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
+ }
+ }
+
+ /* third pass - write correct offsets */
+ for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
+
+ uint64_t offset = 0;
+
+ entry = &e_local[mz_iter];
+
+ if (entry->mz.addr_64 == 0)
+ break;
+
+ /* find page for current memzone */
+ for (i = 0; i < RTE_DIM(pages); i++) {
+ /* we found our page */
+ if (entry->mz.addr_64 >= pages[i].addr_64 &&
+ entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
+ entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
+ offset;
+ break;
+ }
+ offset += pages[i].len;
+ }
+ if (i == RTE_DIM(pages)) {
+ RTE_LOG(ERR, EAL, "Page not found!\n");
+ goto fail;
+ }
+ }
+
+ ms_iter = 0;
+ prev_entry = NULL;
+
+ /* fourth pass - create proper memseg cache */
+ for (i = 0; i < RTE_DIM(pages) &&
+ ms_iter <= RTE_DIM(config->memseg_cache); i++) {
+ if (pages[i].addr_64 == 0)
+ break;
+
+
+ if (ms_iter == RTE_DIM(pages)) {
+ RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
+ goto fail;
+ }
+
+ c_entry = &ms_local[ms_iter];
+ c_entry->len = pages[i].len;
+
+ if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
+ goto fail;
+
+ /* if previous entry has the same filename and is contiguous,
+ * clear current entry and increase previous entry's length
+ */
+ if (prev_entry != NULL &&
+ strncmp(c_entry->filepath, prev_entry->filepath,
+ sizeof(c_entry->filepath)) == 0 &&
+ prev_entry->offset + prev_entry->len == c_entry->offset) {
+ prev_entry->len += pages[i].len;
+ memset(c_entry, 0, sizeof(struct memseg_cache_entry));
+ }
+ else {
+ prev_entry = c_entry;
+ ms_iter++;
+ }
+ }
+
+ /* update current configuration with new valid data */
+ memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
+ memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
+
+ free(ms_local);
+ free(e_local);
+
+ return 0;
+fail:
+ free(ms_local);
+fail_ms:
+ free(e_local);
+fail_e:
+ return -1;
+}
+
+static int
+add_memzone_to_metadata(const struct rte_memzone * mz,
+ struct ivshmem_config * config)
+{
+ struct rte_ivshmem_metadata_entry * entry;
+ unsigned i;
+
+ rte_spinlock_lock(&config->sl);
+
+ /* find free slot in this config */
+ for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
+ entry = &config->metadata->entry[i];
+
+ if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
+ RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
+ goto fail;
+ }
+
+ /* if addr is zero, the memzone is probably free */
+ if (entry->mz.addr_64 == 0) {
+ RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
+ mz->name, mz->addr, config->metadata->name);
+ memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
+
+ /* run config file parser */
+ if (build_config(config->metadata) < 0)
+ goto fail;
+
+ break;
+ }
+ }
+
+ /* if we reached the maximum, that means we have no place in config */
+ if (i == RTE_DIM(config->metadata->entry)) {
+ RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
+ config->metadata->name);
+ goto fail;
+ }
+
+ rte_spinlock_unlock(&config->sl);
+ return 0;
+fail:
+ rte_spinlock_unlock(&config->sl);
+ return -1;
+}
+
+static int
+add_ring_to_metadata(const struct rte_ring * r,
+ struct ivshmem_config * config)
+{
+ struct rte_memzone * mz;
+
+ mz = get_memzone_by_addr(r);
+
+ if (!mz) {
+ RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
+ return -1;
+ }
+
+ return add_memzone_to_metadata(mz, config);
+}
+
+static int
+add_mempool_to_metadata(const struct rte_mempool * mp,
+ struct ivshmem_config * config)
+{
+ struct rte_memzone * mz;
+ int ret;
+
+ mz = get_memzone_by_addr(mp);
+ ret = 0;
+
+ if (!mz) {
+ RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
+ return -1;
+ }
+
+ /* mempool consists of memzone and ring */
+ ret = add_memzone_to_metadata(mz, config);
+ if (ret < 0)
+ return -1;
+
+ return add_ring_to_metadata(mp->ring, config);
+}
+
+int
+rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
+{
+ struct ivshmem_config * config;
+
+ if (name == NULL || r == NULL)
+ return -1;
+
+ config = get_config_by_name(name);
+
+ if (config == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+ return -1;
+ }
+
+ return add_ring_to_metadata(r, config);
+}
+
+int
+rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
+{
+ struct ivshmem_config * config;
+
+ if (name == NULL || mz == NULL)
+ return -1;
+
+ config = get_config_by_name(name);
+
+ if (config == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+ return -1;
+ }
+
+ return add_memzone_to_metadata(mz, config);
+}
+
+int
+rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
+{
+ struct ivshmem_config * config;
+
+ if (name == NULL || mp == NULL)
+ return -1;
+
+ config = get_config_by_name(name);
+
+ if (config == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+ return -1;
+ }
+
+ return add_mempool_to_metadata(mp, config);
+}
+
+static inline void
+ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
+{
+ rte_snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
+}
+
+
+
+static inline
+void *ivshmem_metadata_create(const char *name, size_t size,
+ struct flock *lock)
+{
+ int retval, fd;
+ void *metadata_addr;
+ char pathname[PATH_MAX];
+
+ ivshmem_config_path(pathname, sizeof(pathname), name);
+
+ fd = open(pathname, O_RDWR | O_CREAT, 0660);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
+ return NULL;
+ }
+
+ size = METADATA_SIZE_ALIGNED;
+
+ retval = fcntl(fd, F_SETLK, lock);
+ if (retval < 0){
+ close(fd);
+ RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
+ "process using it?\n", pathname);
+ return NULL;
+ }
+
+ retval = ftruncate(fd, size);
+ if (retval < 0){
+ close(fd);
+ RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
+ return NULL;
+ }
+
+ metadata_addr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (metadata_addr == MAP_FAILED){
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
+
+ /* we don't care if we can't unlock */
+ fcntl(fd, F_UNLCK, lock);
+ close(fd);
+
+ return NULL;
+ }
+
+ return metadata_addr;
+}
+
+int rte_ivshmem_metadata_create(const char *name)
+{
+ struct ivshmem_config * ivshmem_config;
+ unsigned index;
+
+ if (pagesz == 0)
+ pagesz = getpagesize();
+
+ if (name == NULL)
+ return -1;
+
+ rte_spinlock_lock(&global_cfg_sl);
+
+ for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
+ if (ivshmem_global_config[index].metadata == NULL) {
+ ivshmem_config = &ivshmem_global_config[index];
+ break;
+ }
+ }
+
+ if (index == RTE_DIM(ivshmem_global_config)) {
+ RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
+ "Maximum has been reached\n");
+ rte_spinlock_unlock(&global_cfg_sl);
+ return -1;
+ }
+
+ ivshmem_config->lock.l_type = F_WRLCK;
+ ivshmem_config->lock.l_whence = SEEK_SET;
+
+ ivshmem_config->lock.l_start = 0;
+ ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
+
+ ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
+ ivshmem_metadata_create(
+ name,
+ sizeof(struct rte_ivshmem_metadata),
+ &ivshmem_config->lock));
+
+ if (ivshmem_global_config[index].metadata == NULL) {
+ rte_spinlock_unlock(&global_cfg_sl);
+ return -1;
+ }
+
+ /* Metadata setup */
+ memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
+ ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
+ rte_snprintf(ivshmem_config->metadata->name,
+ sizeof(ivshmem_config->metadata->name), "%s", name);
+
+ rte_spinlock_unlock(&global_cfg_sl);
+
+ return 0;
+}
+
+int
+rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
+{
+ const struct memseg_cache_entry * ms_cache, *entry;
+ struct ivshmem_config * config;
+ char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
+ char cfg_file_path[PATH_MAX];
+ unsigned remaining_len, tmplen, iter;
+ uint64_t shared_mem_size, zero_size, total_size;
+
+ if (buffer == NULL || name == NULL)
+ return -1;
+
+ config = get_config_by_name(name);
+
+ if (config == NULL) {
+ RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
+ return -1;
+ }
+
+ rte_spinlock_lock(&config->sl);
+
+ /* prepare metadata file path */
+ rte_snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
+ config->metadata->name);
+
+ ms_cache = config->memseg_cache;
+
+ cmdline_ptr = cmdline;
+ remaining_len = sizeof(cmdline);
+
+ shared_mem_size = 0;
+ iter = 0;
+
+ while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
+
+ entry = &ms_cache[iter];
+
+ /* Offset and sizes within the current pathname */
+ tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+ entry->filepath, entry->offset, entry->len);
+
+ shared_mem_size += entry->len;
+
+ cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+ remaining_len -= tmplen;
+
+ if (remaining_len == 0) {
+ RTE_LOG(ERR, EAL, "Command line too long!\n");
+ rte_spinlock_unlock(&config->sl);
+ return -1;
+ }
+
+ iter++;
+ }
+
+ total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
+ zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
+
+ /* add /dev/zero to command-line to fill the space */
+ tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+ "/dev/zero",
+ 0x0,
+ zero_size);
+
+ cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+ remaining_len -= tmplen;
+
+ if (remaining_len == 0) {
+ RTE_LOG(ERR, EAL, "Command line too long!\n");
+ rte_spinlock_unlock(&config->sl);
+ return -1;
+ }
+
+ /* add metadata file to the end of command-line */
+ tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+ cfg_file_path,
+ 0x0,
+ METADATA_SIZE_ALIGNED);
+
+ cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+ remaining_len -= tmplen;
+
+ if (remaining_len == 0) {
+ RTE_LOG(ERR, EAL, "Command line too long!\n");
+ rte_spinlock_unlock(&config->sl);
+ return -1;
+ }
+
+ /* if current length of the command line is bigger than the buffer supplied
+ * by the user, or if command-line is bigger than what IVSHMEM accepts */
+ if ((sizeof(cmdline) - remaining_len) > size) {
+ RTE_LOG(ERR, EAL, "Buffer is too short!\n");
+ rte_spinlock_unlock(&config->sl);
+ return -1;
+ }
+ /* complete the command-line */
+ rte_snprintf(buffer, size,
+ IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
+ total_size >> 20,
+ cmdline);
+
+ rte_spinlock_unlock(&config->sl);
+
+ return 0;
+}
+
+
+void
+rte_ivshmem_metadata_dump(const char *name)
+{
+ unsigned i = 0;
+ struct ivshmem_config * config;
+ struct rte_ivshmem_metadata_entry *entry;
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ uint64_t addr;
+ uint64_t end, hugepage_sz;
+ struct memseg_cache_entry e;
+#endif
+
+ if (name == NULL)
+ return;
+
+ /* return error if we try to use an unknown config file */
+ config = get_config_by_name(name);
+ if (config == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+ return;
+ }
+
+ rte_spinlock_lock(&config->sl);
+
+ entry = &config->metadata->entry[0];
+
+ while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
+
+ printf("Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
+ "virt:%-15p, off:0x%-15lx\n",
+ i,
+ entry->mz.name,
+ entry->mz.phys_addr,
+ entry->mz.len,
+ entry->mz.addr,
+ entry->offset);
+ i++;
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+ printf("\tHugepage files:\n");
+
+ hugepage_sz = entry->mz.hugepage_sz;
+ addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
+ end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
+ hugepage_sz);
+
+ for (; addr < end; addr += hugepage_sz) {
+ memset(&e, 0, sizeof(e));
+
+ get_hugefile_by_virt_addr(addr, &e);
+
+ printf("\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
+ addr, addr + hugepage_sz, e.offset, e.filepath);
+ }
+#endif
+ entry++;
+ }
+
+ rte_spinlock_unlock(&config->sl);
+}
diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h
new file mode 100644
index 0000000..9ff54bb
--- /dev/null
+++ b/lib/librte_ivshmem/rte_ivshmem.h
@@ -0,0 +1,163 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_IVSHMEM_H_
+#define RTE_IVSHMEM_H_
+
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+
+/**
+ * @file
+ *
+ * The RTE IVSHMEM interface provides functions to create metadata files
+ * describing memory segments to be shared via QEMU IVSHMEM.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IVSHMEM_MAGIC 0x0BADC0DE
+#define IVSHMEM_NAME_LEN 32
+
+/**
+ * Structure that holds IVSHMEM shared metadata entry.
+ */
+struct rte_ivshmem_metadata_entry {
+ struct rte_memzone mz; /**< shared memzone */
+ uint64_t offset; /**< offset of memzone within IVSHMEM device */
+};
+
+/**
+ * Structure that holds IVSHMEM metadata.
+ */
+struct rte_ivshmem_metadata {
+ int magic_number; /**< magic number */
+ char name[IVSHMEM_NAME_LEN]; /**< name of the metadata file */
+ struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES];
+ /**< metadata entries */
+};
+
+/**
+ * Creates metadata file with a given name
+ *
+ * @param name
+ * Name of metadata file to be created
+ *
+ * @return
+ * - On success, zero
+ * - On failure, a negative value
+ */
+int rte_ivshmem_metadata_create(const char * name);
+
+/**
+ * Adds memzone to a specific metadata file
+ *
+ * @param mz
+ * Memzone to be added
+ * @param md_name
+ * Name of metadata file for the memzone to be added to
+ *
+ * @return
+ * - On success, zero
+ * - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz,
+ const char * md_name);
+
+/**
+ * Adds a ring descriptor to a specific metadata file
+ *
+ * @param r
+ * Ring descriptor to be added
+ * @param md_name
+ * Name of metadata file for the ring to be added to
+ *
+ * @return
+ * - On success, zero
+ * - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_ring(const struct rte_ring * r,
+ const char * md_name);
+
+/**
+ * Adds a mempool to a specific metadata file
+ *
+ * @param mp
+ * Mempool to be added
+ * @param md_name
+ * Name of metadata file for the mempool to be added to
+ *
+ * @return
+ * - On success, zero
+ * - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp,
+ const char * md_name);
+
+
+/**
+ * Generates the QEMU command-line for IVSHMEM device for a given metadata file.
+ * This function is to be called after all the objects were added.
+ *
+ * @param buffer
+ * Buffer to be filled with the command line arguments.
+ * @param size
+ * Size of the buffer.
+ * @param name
+ * Name of metadata file to generate QEMU command-line parameters for
+ *
+ * @return
+ * - On success, zero
+ * - On failure, a negative value
+ */
+int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size,
+ const char *name);
+
+
+/**
+ * Dump all metadata entries from a given metadata file to the console.
+ *
+ * @name
+ * Name of the metadata file to be dumped to console.
+ */
+void rte_ivshmem_metadata_dump(const char *name);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_IVSHMEM_H_ */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 89b1c3b..a974dc8 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -64,6 +64,12 @@ LDLIBS += -lrte_kni
endif
endif
+ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+LDLIBS += -lrte_ivshmem
+endif
+endif
+
ifeq ($(CONFIG_RTE_LIBRTE_E1000_PMD),y)
LDLIBS += -lrte_pmd_e1000
endif