首页 > 编程学习 > DPDK和VPP地址池

DPDK和VPP地址池

发布时间:2022/11/14 5:21:54

如下VPP命令,默认创建名称为vpp pool 0的地址池,其中包括256032个vlib_buffer_t内存结构。

vpp# show dpdk buffer
name="vpp pool 0"  available =  219146 allocated =   36886 total =  256032

以上内存池由函数dpdk_buffer_pools_create创建,其遍历VM的地址池buffer_pools向量,创建地址池buffer_pool。在创建DPDK内存池之前,提前向DPDK创建两个内存池操作集,名称分别为vpp和vpp-no-cache,在创建DPDK内存池时会使用到。

clib_error_t *
dpdk_buffer_pools_create (vlib_main_t * vm)
{ 
  clib_error_t *err;
  vlib_buffer_pool_t *bp;
  
  struct rte_mempool_ops ops = { };
  
  strncpy (ops.name, "vpp", 4);
  ops.alloc = dpdk_ops_vpp_alloc;
  ops.free = dpdk_ops_vpp_free;
  ops.get_count = dpdk_ops_vpp_get_count;
  ops.enqueue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_enqueue);
  ops.dequeue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_dequeue);
  rte_mempool_register_ops (&ops);
  
  strncpy (ops.name, "vpp-no-cache", 13);
  ops.get_count = dpdk_ops_vpp_get_count_no_cache;
  ops.enqueue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_enqueue_no_cache);
  ops.dequeue = dpdk_ops_vpp_dequeue_no_cache;
  rte_mempool_register_ops (&ops);

  /* *INDENT-OFF* */
  vec_foreach (bp, vm->buffer_main->buffer_pools)
    if (bp->start && (err = dpdk_buffer_pool_init (vm, bp)))
      return err;

首先计算pool中元素的大小elt_size,等于DPDK的rte_mbuf的大小,加上VPP的vlib_buffer_t结构的大小,最后加上实际存放报文数据的内存大小data_size,为pool中单个元素的大小。

数组dpdk_mempool_by_buffer_pool_index和dpdk_no_cache_mempool_by_buffer_pool_index用于存放之后创建的DPDK mempool地址。

clib_error_t *
dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
{
  uword buffer_mem_start = vm->buffer_main->buffer_mem_start;
  struct rte_mempool *mp, *nmp;
  struct rte_pktmbuf_pool_private priv;
  enum rte_iova_mode iova_mode;
  u8 *name = 0;

  u32 elt_size =
    sizeof (struct rte_mbuf) + sizeof (vlib_buffer_t) + bp->data_size;

  /* create empty mempools */
  vec_validate_aligned (dpdk_mempool_by_buffer_pool_index, bp->index,
            CLIB_CACHE_LINE_BYTES);
  vec_validate_aligned (dpdk_no_cache_mempool_by_buffer_pool_index, bp->index,
            CLIB_CACHE_LINE_BYTES);

创建DPDK内存池结构,elt_size为元素大小,bp->n_buffers为元素数量。对于首个内存池,命名为:vpp pool 0,最后的数字为VPP内存池的索引值。

  /* normal mempool */
  name = format (name, "vpp pool %u%c", bp->index, 0);
  mp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
                 elt_size, 512, sizeof (priv),
                 bp->numa_node, 0);
  if (!mp)
    {
      vec_free (name);
      return clib_error_return (0,
                "failed to create normal mempool for numa node %u",
                bp->index);
    }
  vec_reset_length (name);

接下来创建一个相同规格的非缓存DPDK内存池,命名 vpp pool 0 (no cache),其中的数字对于与VPP内存池索引。将以上创建的的两个DPDK内存池赋值与以VPP内存池索引的数组dpdk_mempool_by_buffer_pool_index和dpdk_no_cache_mempool_by_buffer_pool_index。

  /* non-cached mempool */
  name = format (name, "vpp pool %u (no cache)%c", bp->index, 0);
  nmp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
                  elt_size, 0, sizeof (priv),
                  bp->numa_node, 0);
  if (!nmp)
    {
      rte_mempool_free (mp);
      vec_free (name);
      return clib_error_return (0,
                "failed to create non-cache mempool for numa nude %u",
                bp->index);
    }
  vec_free (name);

  dpdk_mempool_by_buffer_pool_index[bp->index] = mp;
  dpdk_no_cache_mempool_by_buffer_pool_index[bp->index] = nmp;

为新创建的两个DPDK内存池关联提前注册的操作集。初始化DPDK内存池的私有结构,mbuf_data_room_size表示VPP结构vlib_buffer_t中预留的报文数据长度,与正在存放报文数据的内存大小之和。而mbuf_priv_size表示VPP结构vlib_buffer_t大小除去预留报文数据长度之后的长度。

  mp->pool_id = nmp->pool_id = bp->index;

  rte_mempool_set_ops_byname (mp, "vpp", NULL);
  rte_mempool_set_ops_byname (nmp, "vpp-no-cache", NULL);

  /* Call the mempool priv initializer */
  memset (&priv, 0, sizeof (priv));
  priv.mbuf_data_room_size = VLIB_BUFFER_PRE_DATA_SIZE +
    vlib_buffer_get_default_data_size (vm);
  priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE;
  rte_pktmbuf_pool_init (mp, &priv);
  rte_pktmbuf_pool_init (nmp, &priv);

接下来初始化DPDK内存池的元素链表,链表元素为rte_mempool_objhdr结构,其中子成员iova保存DPDK结构rte_mbuf的地址(根据IOVA模式,决定保存物理还是虚拟地址)。

|---rte_mempool_objhdr---|----------rte_mbuf----------|------vlib_buffer_t------|------packet_data------|
                         |                            |                         |
                         |                            |---priv.mbuf_priv_size---|-------data_size-------|
                         |
                         |------------------------------------elt_size----------------------------------|

涉及到的三个数据结构内存分布如上所示。

  iova_mode = rte_eal_iova_mode ();

  /* populate mempool object buffer header */
  for (i = 0; i < bp->n_buffers; i++)
    {
      struct rte_mempool_objhdr *hdr;
      vlib_buffer_t *b = vlib_get_buffer (vm, bp->buffers[i]);
      struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer (b);
      hdr = (struct rte_mempool_objhdr *) RTE_PTR_SUB (mb, sizeof (*hdr));
      hdr->mp = mp;
      hdr->iova = (iova_mode == RTE_IOVA_VA) ?
    pointer_to_uword (mb) : vlib_physmem_get_pa (vm, mb);
      STAILQ_INSERT_TAIL (&mp->elt_list, hdr, next);
      STAILQ_INSERT_TAIL (&nmp->elt_list, hdr, next);
      mp->populated_size++;
      nmp->populated_size++;
    }
#if RTE_VERSION >= RTE_VERSION_NUM(22, 3, 0, 0)
  mp->flags &= ~RTE_MEMPOOL_F_NON_IO;
#endif

遍历DPDK内存池元素,调用rte_pktmbuf_init初始化化每个元素(rte_mbuf结构)。接下来,将第一个DPDK mbuf模板内容拷贝到当前索引指向的DPDK mbuf模板。遍历VPP结构vlib_buffer_t,将其初始化为vlib_buffer模板中的内容(参见vlib_buufer_t结构,模板长度为64字节,即cache大小)。

  /* call the object initializers */
  rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0);

  /* create mbuf header tempate from the first buffer in the pool */
  vec_validate_aligned (dpdk_mbuf_template_by_pool_index, bp->index,
            CLIB_CACHE_LINE_BYTES);
  clib_memcpy (vec_elt_at_index (dpdk_mbuf_template_by_pool_index, bp->index),
           rte_mbuf_from_vlib_buffer (vlib_buffer_ptr_from_index
                      (buffer_mem_start, *bp->buffers,
                       0)), sizeof (struct rte_mbuf));

  for (i = 0; i < bp->n_buffers; i++)
    {
      vlib_buffer_t *b;
      b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0);
      vlib_buffer_copy_template (b, &bp->buffer_template);
    }

最后,初始化DPDK内存池的mem_list链表,其元素结构为rte_mempool_memhdr,根据VPP内存池的物理内存结构,为每个页面分配rte_mempool_memhdr结构,记录页面的地址等信息。

  /* map DMA pages if at least one physical device exists */
  if (rte_eth_dev_count_avail () || rte_cryptodev_count ())
    {
      uword i;
      size_t page_sz;
      vlib_physmem_map_t *pm;
      int do_vfio_map = 1;

      pm = vlib_physmem_get_map (vm, bp->physmem_map_index);
      page_sz = 1ULL << pm->log2_page_size;

      for (i = 0; i < pm->n_pages; i++)
    {
      char *va = ((char *) pm->base) + i * page_sz;
      uword pa = (iova_mode == RTE_IOVA_VA) ?
        pointer_to_uword (va) : pm->page_table[i];

      if (do_vfio_map &&
#if RTE_VERSION < RTE_VERSION_NUM(19, 11, 0, 0)
          rte_vfio_dma_map (pointer_to_uword (va), pa, page_sz))
#else
          rte_vfio_container_dma_map (RTE_VFIO_DEFAULT_CONTAINER_FD,
                      pointer_to_uword (va), pa, page_sz))
#endif
        do_vfio_map = 0;

      struct rte_mempool_memhdr *memhdr;
      memhdr = clib_mem_alloc (sizeof (*memhdr));
      memhdr->mp = mp;
      memhdr->addr = va;
      memhdr->iova = pa;
      memhdr->len = page_sz;
      memhdr->free_cb = 0;
      memhdr->opaque = 0;

      STAILQ_INSERT_TAIL (&mp->mem_list, memhdr, next);
      mp->nb_mem_chunks++;
    }
    }
Copyright © 2010-2022 dgrt.cn 版权所有 |关于我们| 联系方式