summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnatoly Burakov <anatoly.burakov@intel.com>2019-11-05 15:15:12 +0000
committerDavid Marchand <david.marchand@redhat.com>2019-11-07 17:46:43 +0100
commit47c45a4df687a1bf1f0ae186ebe555c883b446f2 (patch)
tree6b1e518d5f958c81e085d4f7535a9db7de670a73
parentb14d192ca1fc7575f415f363990c202cece84e28 (diff)
downloaddpdk-47c45a4df687a1bf1f0ae186ebe555c883b446f2.zip
dpdk-47c45a4df687a1bf1f0ae186ebe555c883b446f2.tar.gz
dpdk-47c45a4df687a1bf1f0ae186ebe555c883b446f2.tar.xz
vfio: fix DMA mapping of external heaps
Currently, externally created heaps are supposed to be automatically mapped for VFIO DMA by EAL, however they only do so if, at the time of heap creation, VFIO is initialized and has at least one device available. If no devices are available at the time of heap creation (or if devices were available, but were since hot-unplugged, thus dropping all VFIO container mappings), then VFIO mapping code would have skipped over externally allocated heaps. The fix is two-fold. First, we allow externally allocated memory segments to be marked as "heap" segments. This allows us to distinguish between external memory segments that were created via heap API, from those that were created via rte_extmem_register() API. Then, we fix the VFIO code to only skip non-heap external segments. Also, since external heaps are not guaranteed to have valid IOVA addresses, we will skip those which have invalid IOVA addresses as well. Fixes: 0f526d674f8e ("malloc: separate creating memseg list and malloc heap") Cc: stable@dpdk.org Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com> Tested-by: Rajesh Ravi <rajesh.ravi@broadcom.com> Acked-by: David Marchand <david.marchand@redhat.com>
-rw-r--r--lib/librte_eal/common/include/rte_memory.h1
-rw-r--r--lib/librte_eal/common/rte_malloc.c1
-rw-r--r--lib/librte_eal/freebsd/eal/eal_memory.c1
-rw-r--r--lib/librte_eal/linux/eal/eal_memory.c3
-rw-r--r--lib/librte_eal/linux/eal/eal_vfio.c45
5 files changed, 46 insertions, 5 deletions
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 38e00e3..bf81a2f 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -81,6 +81,7 @@ struct rte_memseg_list {
volatile uint32_t version; /**< version number for multiprocess sync. */
size_t len; /**< Length of memory area covered by this memseg list. */
unsigned int external; /**< 1 if this list points to external memory */
+ unsigned int heap; /**< 1 if this list points to a heap */
struct rte_fbarray memseg_arr;
};
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index 044d3a9..413e4aa 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -396,6 +396,7 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
rte_spinlock_lock(&heap->lock);
ret = malloc_heap_add_external_memory(heap, msl);
+ msl->heap = 1; /* mark it as heap segment */
rte_spinlock_unlock(&heap->lock);
unlock:
diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c
index 7fe3178..a97d8f0 100644
--- a/lib/librte_eal/freebsd/eal/eal_memory.c
+++ b/lib/librte_eal/freebsd/eal/eal_memory.c
@@ -93,6 +93,7 @@ rte_eal_hugepage_init(void)
msl->page_sz = page_sz;
msl->len = internal_config.memory;
msl->socket_id = 0;
+ msl->heap = 1;
/* populate memsegs. each memseg is 1 page long */
for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c
index accfd2e..43e4ffc 100644
--- a/lib/librte_eal/linux/eal/eal_memory.c
+++ b/lib/librte_eal/linux/eal/eal_memory.c
@@ -831,6 +831,7 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
msl->page_sz = page_sz;
msl->socket_id = socket_id;
msl->base_va = NULL;
+ msl->heap = 1; /* mark it as a heap segment */
RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
(size_t)page_sz >> 10, socket_id);
@@ -1405,6 +1406,7 @@ eal_legacy_hugepage_init(void)
msl->page_sz = page_sz;
msl->socket_id = 0;
msl->len = internal_config.memory;
+ msl->heap = 1;
/* we're in single-file segments mode, so only the segment list
* fd needs to be set up.
@@ -1677,6 +1679,7 @@ eal_legacy_hugepage_init(void)
mem_sz = msl->len;
munmap(msl->base_va, mem_sz);
msl->base_va = NULL;
+ msl->heap = 0;
/* destroy backing fbarray */
rte_fbarray_destroy(&msl->memseg_arr);
diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index 58c0f85..28e10cd 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -1253,7 +1253,16 @@ type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
{
int *vfio_container_fd = arg;
- if (msl->external)
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ /* if IOVA mode is VA, we've already mapped the internal segments */
+ if (!msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
return 0;
return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
@@ -1320,8 +1329,13 @@ vfio_type1_dma_map(int vfio_container_fd)
/* with IOVA as VA mode, we can get away with mapping contiguous
* chunks rather than going page-by-page.
*/
- return rte_memseg_contig_walk(type1_map_contig,
+ int ret = rte_memseg_contig_walk(type1_map_contig,
&vfio_container_fd);
+ if (ret)
+ return ret;
+ /* we have to continue the walk because we've skipped the
+ * external segments during the config walk.
+ */
}
return rte_memseg_walk(type1_map, &vfio_container_fd);
}
@@ -1413,7 +1427,15 @@ vfio_spapr_map_walk(const struct rte_memseg_list *msl,
{
struct spapr_remap_walk_param *param = arg;
- if (msl->external || ms->addr_64 == param->addr_64)
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ if (ms->addr_64 == param->addr_64)
return 0;
return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
@@ -1426,7 +1448,15 @@ vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
{
struct spapr_remap_walk_param *param = arg;
- if (msl->external || ms->addr_64 == param->addr_64)
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ if (ms->addr_64 == param->addr_64)
return 0;
return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
@@ -1446,7 +1476,12 @@ vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
struct spapr_walk_param *param = arg;
uint64_t max = ms->iova + ms->len;
- if (msl->external)
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
return 0;
/* do not iterate ms we haven't mapped yet */