mirror of
https://git.hardenedbsd.org/hardenedbsd/HardenedBSD.git
synced 2024-11-25 01:55:19 +01:00
2348ac893d
DQO is the descriptor format for our next generation virtual NIC. It is necessary to make full use of the hardware bandwidth on many newer GCP VM shapes. This patch extends the previously introduced DQO descriptor format with a "QPL" mode. QPL stands for Queue Page List and refers to the fact that the hardware cannot access arbitrary regions of the host memory and instead expects a fixed bounce buffer comprising of a list of pages. The QPL aspects are similar to the already existing GQI queue queue format: in that the mbufs being input in the Rx path have external storage in the form of vm pages attached to them; and in the Tx path we always copy the mbuf payload into QPL pages. Signed-off-by: Shailend Chand <shailend@google.com> Reviewed-by: markj MFC-after: 2 weeks Differential Revision: https://reviews.freebsd.org/D46691
1013 lines
26 KiB
C
1013 lines
26 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
*
|
|
* Copyright (c) 2024 Google LLC
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
#include "gve.h"
|
|
#include "gve_adminq.h"
|
|
#include "gve_dqo.h"
|
|
|
|
static void
|
|
gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
|
|
{
|
|
struct gve_rx_buf_dqo *buf;
|
|
int i;
|
|
|
|
if (gve_is_qpl(rx->com.priv))
|
|
return;
|
|
|
|
for (i = 0; i < rx->dqo.buf_cnt; i++) {
|
|
buf = &rx->dqo.bufs[i];
|
|
if (!buf->mbuf)
|
|
continue;
|
|
|
|
bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
|
|
BUS_DMASYNC_POSTREAD);
|
|
bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
|
|
m_freem(buf->mbuf);
|
|
buf->mbuf = NULL;
|
|
}
|
|
}
|
|
|
|
void
|
|
gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
|
|
{
|
|
struct gve_rx_ring *rx = &priv->rx[i];
|
|
int j;
|
|
|
|
if (rx->dqo.compl_ring != NULL) {
|
|
gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
|
|
rx->dqo.compl_ring = NULL;
|
|
}
|
|
|
|
if (rx->dqo.desc_ring != NULL) {
|
|
gve_dma_free_coherent(&rx->desc_ring_mem);
|
|
rx->dqo.desc_ring = NULL;
|
|
}
|
|
|
|
if (rx->dqo.bufs != NULL) {
|
|
gve_free_rx_mbufs_dqo(rx);
|
|
|
|
if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
|
|
for (j = 0; j < rx->dqo.buf_cnt; j++)
|
|
if (rx->dqo.bufs[j].mapped)
|
|
bus_dmamap_destroy(rx->dqo.buf_dmatag,
|
|
rx->dqo.bufs[j].dmamap);
|
|
}
|
|
|
|
free(rx->dqo.bufs, M_GVE);
|
|
rx->dqo.bufs = NULL;
|
|
}
|
|
|
|
if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
|
|
bus_dma_tag_destroy(rx->dqo.buf_dmatag);
|
|
}
|
|
|
|
int
|
|
gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
|
|
{
|
|
struct gve_rx_ring *rx = &priv->rx[i];
|
|
int err;
|
|
int j;
|
|
|
|
err = gve_dma_alloc_coherent(priv,
|
|
sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
|
|
CACHE_LINE_SIZE, &rx->desc_ring_mem);
|
|
if (err != 0) {
|
|
device_printf(priv->dev,
|
|
"Failed to alloc desc ring for rx ring %d", i);
|
|
goto abort;
|
|
}
|
|
rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
|
|
rx->dqo.mask = priv->rx_desc_cnt - 1;
|
|
|
|
err = gve_dma_alloc_coherent(priv,
|
|
sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
|
|
CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
|
|
if (err != 0) {
|
|
device_printf(priv->dev,
|
|
"Failed to alloc compl ring for rx ring %d", i);
|
|
goto abort;
|
|
}
|
|
rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
|
|
rx->dqo.mask = priv->rx_desc_cnt - 1;
|
|
|
|
rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
|
|
priv->rx_desc_cnt;
|
|
rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
|
|
M_GVE, M_WAITOK | M_ZERO);
|
|
|
|
if (gve_is_qpl(priv)) {
|
|
rx->com.qpl = &priv->qpls[priv->tx_cfg.max_queues + i];
|
|
if (rx->com.qpl == NULL) {
|
|
device_printf(priv->dev, "No QPL left for rx ring %d", i);
|
|
return (ENOMEM);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
err = bus_dma_tag_create(
|
|
bus_get_dma_tag(priv->dev), /* parent */
|
|
1, 0, /* alignment, bounds */
|
|
BUS_SPACE_MAXADDR, /* lowaddr */
|
|
BUS_SPACE_MAXADDR, /* highaddr */
|
|
NULL, NULL, /* filter, filterarg */
|
|
MCLBYTES, /* maxsize */
|
|
1, /* nsegments */
|
|
MCLBYTES, /* maxsegsize */
|
|
0, /* flags */
|
|
NULL, /* lockfunc */
|
|
NULL, /* lockarg */
|
|
&rx->dqo.buf_dmatag);
|
|
if (err != 0) {
|
|
device_printf(priv->dev,
|
|
"%s: bus_dma_tag_create failed: %d\n",
|
|
__func__, err);
|
|
goto abort;
|
|
}
|
|
|
|
for (j = 0; j < rx->dqo.buf_cnt; j++) {
|
|
err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
|
|
&rx->dqo.bufs[j].dmamap);
|
|
if (err != 0) {
|
|
device_printf(priv->dev,
|
|
"err in creating rx buf dmamap %d: %d",
|
|
j, err);
|
|
goto abort;
|
|
}
|
|
rx->dqo.bufs[j].mapped = true;
|
|
}
|
|
|
|
return (0);
|
|
|
|
abort:
|
|
gve_rx_free_ring_dqo(priv, i);
|
|
return (err);
|
|
}
|
|
|
|
static void
|
|
gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
|
|
{
|
|
struct gve_ring_com *com = &rx->com;
|
|
int entries;
|
|
int i;
|
|
|
|
entries = com->priv->rx_desc_cnt;
|
|
for (i = 0; i < entries; i++)
|
|
rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
|
|
|
|
bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
|
|
BUS_DMASYNC_PREWRITE);
|
|
}
|
|
|
|
static void
|
|
gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
|
|
{
|
|
struct gve_ring_com *com = &rx->com;
|
|
int i;
|
|
|
|
for (i = 0; i < com->priv->rx_desc_cnt; i++)
|
|
rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
|
|
|
|
bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
|
|
BUS_DMASYNC_PREWRITE);
|
|
}
|
|
|
|
void
|
|
gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
|
|
{
|
|
struct gve_rx_ring *rx = &priv->rx[i];
|
|
int j;
|
|
|
|
rx->fill_cnt = 0;
|
|
rx->cnt = 0;
|
|
rx->dqo.mask = priv->rx_desc_cnt - 1;
|
|
rx->dqo.head = 0;
|
|
rx->dqo.tail = 0;
|
|
rx->dqo.cur_gen_bit = 0;
|
|
|
|
gve_rx_clear_desc_ring_dqo(rx);
|
|
gve_rx_clear_compl_ring_dqo(rx);
|
|
|
|
gve_free_rx_mbufs_dqo(rx);
|
|
|
|
if (gve_is_qpl(priv)) {
|
|
SLIST_INIT(&rx->dqo.free_bufs);
|
|
STAILQ_INIT(&rx->dqo.used_bufs);
|
|
|
|
for (j = 0; j < rx->dqo.buf_cnt; j++) {
|
|
struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
|
|
|
|
vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
|
|
u_int ref_count = atomic_load_int(&page->ref_count);
|
|
|
|
/*
|
|
* An ifconfig down+up might see pages still in flight
|
|
* from the previous innings.
|
|
*/
|
|
if (VPRC_WIRE_COUNT(ref_count) == 1)
|
|
SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
|
|
buf, slist_entry);
|
|
else
|
|
STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
|
|
buf, stailq_entry);
|
|
|
|
buf->num_nic_frags = 0;
|
|
buf->next_idx = 0;
|
|
}
|
|
} else {
|
|
SLIST_INIT(&rx->dqo.free_bufs);
|
|
for (j = 0; j < rx->dqo.buf_cnt; j++)
|
|
SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
|
|
&rx->dqo.bufs[j], slist_entry);
|
|
}
|
|
}
|
|
|
|
int
|
|
gve_rx_intr_dqo(void *arg)
|
|
{
|
|
struct gve_rx_ring *rx = arg;
|
|
struct gve_priv *priv = rx->com.priv;
|
|
struct gve_ring_com *com = &rx->com;
|
|
|
|
if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
|
|
return (FILTER_STRAY);
|
|
|
|
/* Interrupts are automatically masked */
|
|
taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
|
|
return (FILTER_HANDLED);
|
|
}
|
|
|
|
static void
|
|
gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
|
|
{
|
|
rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
|
|
rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
|
|
|
|
if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
|
|
bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
|
|
BUS_DMASYNC_PREWRITE);
|
|
gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
|
|
rx->dqo.head);
|
|
}
|
|
}
|
|
|
|
static void
|
|
gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
|
|
{
|
|
struct gve_rx_desc_dqo *desc;
|
|
|
|
bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
|
|
BUS_DMASYNC_PREREAD);
|
|
|
|
desc = &rx->dqo.desc_ring[rx->dqo.head];
|
|
desc->buf_id = htole16(buf - rx->dqo.bufs);
|
|
desc->buf_addr = htole64(buf->addr);
|
|
|
|
gve_rx_advance_head_dqo(rx);
|
|
}
|
|
|
|
static int
|
|
gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
|
|
{
|
|
struct gve_rx_buf_dqo *buf;
|
|
bus_dma_segment_t segs[1];
|
|
int nsegs;
|
|
int err;
|
|
|
|
buf = SLIST_FIRST(&rx->dqo.free_bufs);
|
|
if (__predict_false(!buf)) {
|
|
device_printf(rx->com.priv->dev,
|
|
"Unexpected empty free bufs list\n");
|
|
return (ENOBUFS);
|
|
}
|
|
SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
|
|
|
|
buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
|
|
if (__predict_false(!buf->mbuf)) {
|
|
err = ENOMEM;
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
|
|
counter_exit();
|
|
goto abort_with_buf;
|
|
}
|
|
buf->mbuf->m_len = MCLBYTES;
|
|
|
|
err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
|
|
buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
|
|
KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
|
|
if (__predict_false(err != 0)) {
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
|
|
counter_exit();
|
|
goto abort_with_mbuf;
|
|
}
|
|
buf->addr = segs[0].ds_addr;
|
|
|
|
gve_rx_post_buf_dqo(rx, buf);
|
|
return (0);
|
|
|
|
abort_with_mbuf:
|
|
m_freem(buf->mbuf);
|
|
buf->mbuf = NULL;
|
|
abort_with_buf:
|
|
SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
|
|
return (err);
|
|
}
|
|
|
|
static struct gve_dma_handle *
|
|
gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
|
|
{
|
|
return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
|
|
}
|
|
|
|
static void
|
|
gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
|
|
uint8_t frag_num)
|
|
{
|
|
struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
|
|
union gve_rx_qpl_buf_id_dqo composed_id;
|
|
struct gve_dma_handle *page_dma_handle;
|
|
|
|
composed_id.buf_id = buf - rx->dqo.bufs;
|
|
composed_id.frag_num = frag_num;
|
|
desc->buf_id = htole16(composed_id.all);
|
|
|
|
page_dma_handle = gve_get_page_dma_handle(rx, buf);
|
|
bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
|
|
BUS_DMASYNC_PREREAD);
|
|
desc->buf_addr = htole64(page_dma_handle->bus_addr +
|
|
frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
|
|
|
|
buf->num_nic_frags++;
|
|
gve_rx_advance_head_dqo(rx);
|
|
}
|
|
|
|
static void
|
|
gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
|
|
{
|
|
struct gve_rx_buf_dqo *hol_blocker = NULL;
|
|
struct gve_rx_buf_dqo *buf;
|
|
u_int ref_count;
|
|
vm_page_t page;
|
|
|
|
while (true) {
|
|
buf = STAILQ_FIRST(&rx->dqo.used_bufs);
|
|
if (__predict_false(buf == NULL))
|
|
break;
|
|
|
|
page = rx->com.qpl->pages[buf - rx->dqo.bufs];
|
|
ref_count = atomic_load_int(&page->ref_count);
|
|
|
|
if (VPRC_WIRE_COUNT(ref_count) != 1) {
|
|
/* Account for one head-of-line blocker */
|
|
if (hol_blocker != NULL)
|
|
break;
|
|
hol_blocker = buf;
|
|
STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
|
|
stailq_entry);
|
|
continue;
|
|
}
|
|
|
|
STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
|
|
stailq_entry);
|
|
SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
|
|
buf, slist_entry);
|
|
if (just_one)
|
|
break;
|
|
}
|
|
|
|
if (hol_blocker != NULL)
|
|
STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
|
|
hol_blocker, stailq_entry);
|
|
}
|
|
|
|
static int
|
|
gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
|
|
{
|
|
struct gve_rx_buf_dqo *buf;
|
|
|
|
buf = SLIST_FIRST(&rx->dqo.free_bufs);
|
|
if (__predict_false(buf == NULL)) {
|
|
gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
|
|
buf = SLIST_FIRST(&rx->dqo.free_bufs);
|
|
if (__predict_false(buf == NULL))
|
|
return (ENOBUFS);
|
|
}
|
|
|
|
gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
|
|
if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
|
|
buf->next_idx = 0;
|
|
else
|
|
buf->next_idx++;
|
|
|
|
/*
|
|
* We have posted all the frags in this buf to the NIC.
|
|
* - buf will enter used_bufs once the last completion arrives.
|
|
* - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
|
|
* when its wire count drops back to 1.
|
|
*/
|
|
if (buf->next_idx == 0)
|
|
SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
|
|
{
|
|
uint32_t num_pending_bufs;
|
|
uint32_t num_to_post;
|
|
uint32_t i;
|
|
int err;
|
|
|
|
num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
|
|
num_to_post = rx->dqo.mask - num_pending_bufs;
|
|
|
|
for (i = 0; i < num_to_post; i++) {
|
|
if (gve_is_qpl(rx->com.priv))
|
|
err = gve_rx_post_new_dqo_qpl_buf(rx);
|
|
else
|
|
err = gve_rx_post_new_mbuf_dqo(rx, how);
|
|
if (err)
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
|
|
{
|
|
gve_rx_post_buffers_dqo(rx, M_WAITOK);
|
|
}
|
|
|
|
static void
|
|
gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
|
|
{
|
|
switch (ptype->l3_type) {
|
|
case GVE_L3_TYPE_IPV4:
|
|
switch (ptype->l4_type) {
|
|
case GVE_L4_TYPE_TCP:
|
|
*is_tcp = true;
|
|
M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
|
|
break;
|
|
case GVE_L4_TYPE_UDP:
|
|
M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
|
|
break;
|
|
default:
|
|
M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
|
|
}
|
|
break;
|
|
case GVE_L3_TYPE_IPV6:
|
|
switch (ptype->l4_type) {
|
|
case GVE_L4_TYPE_TCP:
|
|
*is_tcp = true;
|
|
M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
|
|
break;
|
|
case GVE_L4_TYPE_UDP:
|
|
M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
|
|
break;
|
|
default:
|
|
M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
|
|
}
|
|
break;
|
|
default:
|
|
M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
|
|
}
|
|
}
|
|
|
|
static void
|
|
gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
|
|
struct gve_rx_compl_desc_dqo *desc,
|
|
struct gve_ptype *ptype)
|
|
{
|
|
/* HW did not identify and process L3 and L4 headers. */
|
|
if (__predict_false(!desc->l3_l4_processed))
|
|
return;
|
|
|
|
if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
|
|
if (__predict_false(desc->csum_ip_err ||
|
|
desc->csum_external_ip_err))
|
|
return;
|
|
} else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
|
|
/* Checksum should be skipped if this flag is set. */
|
|
if (__predict_false(desc->ipv6_ex_add))
|
|
return;
|
|
}
|
|
|
|
if (__predict_false(desc->csum_l4_err))
|
|
return;
|
|
|
|
switch (ptype->l4_type) {
|
|
case GVE_L4_TYPE_TCP:
|
|
case GVE_L4_TYPE_UDP:
|
|
case GVE_L4_TYPE_ICMP:
|
|
case GVE_L4_TYPE_SCTP:
|
|
mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
|
|
CSUM_IP_VALID |
|
|
CSUM_DATA_VALID |
|
|
CSUM_PSEUDO_HDR;
|
|
mbuf->m_pkthdr.csum_data = 0xffff;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
|
|
struct gve_rx_compl_desc_dqo *compl_desc)
|
|
{
|
|
struct mbuf *mbuf = rx->ctx.mbuf_head;
|
|
if_t ifp = rx->com.priv->ifp;
|
|
struct gve_ptype *ptype;
|
|
bool do_if_input = true;
|
|
bool is_tcp = false;
|
|
|
|
ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
|
|
gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
|
|
mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
|
|
gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
|
|
|
|
mbuf->m_pkthdr.rcvif = ifp;
|
|
mbuf->m_pkthdr.len = rx->ctx.total_size;
|
|
|
|
if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
|
|
is_tcp &&
|
|
(rx->lro.lro_cnt != 0) &&
|
|
(tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
|
|
do_if_input = false;
|
|
|
|
if (do_if_input)
|
|
if_input(ifp, mbuf);
|
|
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
|
|
counter_u64_add_protected(rx->stats.rpackets, 1);
|
|
counter_exit();
|
|
|
|
rx->ctx = (struct gve_rx_ctx){};
|
|
}
|
|
|
|
static int
|
|
gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
|
|
struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
|
|
{
|
|
struct mbuf *mbuf;
|
|
|
|
mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
|
|
if (__predict_false(mbuf == NULL))
|
|
return (ENOMEM);
|
|
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
|
|
counter_exit();
|
|
|
|
m_copyback(mbuf, 0, frag_len, va);
|
|
mbuf->m_len = frag_len;
|
|
|
|
rx->ctx.mbuf_head = mbuf;
|
|
rx->ctx.mbuf_tail = mbuf;
|
|
rx->ctx.total_size += frag_len;
|
|
|
|
gve_rx_input_mbuf_dqo(rx, compl_desc);
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
|
|
struct gve_rx_compl_desc_dqo *compl_desc,
|
|
int *work_done)
|
|
{
|
|
bool is_last_frag = compl_desc->end_of_packet != 0;
|
|
struct gve_rx_ctx *ctx = &rx->ctx;
|
|
struct gve_rx_buf_dqo *buf;
|
|
uint32_t num_pending_bufs;
|
|
uint16_t frag_len;
|
|
uint16_t buf_id;
|
|
int err;
|
|
|
|
buf_id = le16toh(compl_desc->buf_id);
|
|
if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
|
|
device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
|
|
buf_id, rx->com.id);
|
|
gve_schedule_reset(priv);
|
|
goto drop_frag_clear_ctx;
|
|
}
|
|
buf = &rx->dqo.bufs[buf_id];
|
|
if (__predict_false(buf->mbuf == NULL)) {
|
|
device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
|
|
buf_id, rx->com.id);
|
|
gve_schedule_reset(priv);
|
|
goto drop_frag_clear_ctx;
|
|
}
|
|
|
|
if (__predict_false(ctx->drop_pkt))
|
|
goto drop_frag;
|
|
|
|
if (__predict_false(compl_desc->rx_error)) {
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
|
|
counter_exit();
|
|
goto drop_frag;
|
|
}
|
|
|
|
bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
|
|
BUS_DMASYNC_POSTREAD);
|
|
|
|
frag_len = compl_desc->packet_len;
|
|
if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
|
|
err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
|
|
compl_desc, frag_len);
|
|
if (__predict_false(err != 0))
|
|
goto drop_frag;
|
|
(*work_done)++;
|
|
gve_rx_post_buf_dqo(rx, buf);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Although buffer completions may arrive out of order, buffer
|
|
* descriptors are consumed by the NIC in order. That is, the
|
|
* buffer at desc_ring[tail] might not be the buffer we got the
|
|
* completion compl_ring[tail] for: but we know that desc_ring[tail]
|
|
* has already been read by the NIC.
|
|
*/
|
|
num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
|
|
|
|
/*
|
|
* For every fragment received, try to post a new buffer.
|
|
*
|
|
* Failures are okay but only so long as the number of outstanding
|
|
* buffers is above a threshold.
|
|
*
|
|
* Beyond that we drop new packets to reuse their buffers.
|
|
* Without ensuring a minimum number of buffers for the NIC to
|
|
* put packets in, we run the risk of getting the queue stuck
|
|
* for good.
|
|
*/
|
|
err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
|
|
if (__predict_false(err != 0 &&
|
|
num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
|
|
counter_enter();
|
|
counter_u64_add_protected(
|
|
rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
|
|
counter_exit();
|
|
goto drop_frag;
|
|
}
|
|
|
|
buf->mbuf->m_len = frag_len;
|
|
ctx->total_size += frag_len;
|
|
if (ctx->mbuf_tail == NULL) {
|
|
ctx->mbuf_head = buf->mbuf;
|
|
ctx->mbuf_tail = buf->mbuf;
|
|
} else {
|
|
buf->mbuf->m_flags &= ~M_PKTHDR;
|
|
ctx->mbuf_tail->m_next = buf->mbuf;
|
|
ctx->mbuf_tail = buf->mbuf;
|
|
}
|
|
|
|
/*
|
|
* Disassociate the mbuf from buf and surrender buf to the free list to
|
|
* be used by a future mbuf.
|
|
*/
|
|
bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
|
|
buf->mbuf = NULL;
|
|
buf->addr = 0;
|
|
SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
|
|
|
|
if (is_last_frag) {
|
|
gve_rx_input_mbuf_dqo(rx, compl_desc);
|
|
(*work_done)++;
|
|
}
|
|
return;
|
|
|
|
drop_frag:
|
|
/* Clear the earlier frags if there were any */
|
|
m_freem(ctx->mbuf_head);
|
|
rx->ctx = (struct gve_rx_ctx){};
|
|
/* Drop the rest of the pkt if there are more frags */
|
|
ctx->drop_pkt = true;
|
|
/* Reuse the dropped frag's buffer */
|
|
gve_rx_post_buf_dqo(rx, buf);
|
|
|
|
if (is_last_frag)
|
|
goto drop_frag_clear_ctx;
|
|
return;
|
|
|
|
drop_frag_clear_ctx:
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
|
|
counter_exit();
|
|
m_freem(ctx->mbuf_head);
|
|
rx->ctx = (struct gve_rx_ctx){};
|
|
}
|
|
|
|
static void *
|
|
gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
|
|
struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
|
|
{
|
|
int page_idx = buf - rx->dqo.bufs;
|
|
void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
|
|
|
|
va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
|
|
return (va);
|
|
}
|
|
|
|
static int
|
|
gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
|
|
struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
|
|
uint8_t buf_frag_num, uint16_t frag_len)
|
|
{
|
|
void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
|
|
struct mbuf *mbuf;
|
|
|
|
if (ctx->mbuf_tail == NULL) {
|
|
mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
|
|
if (mbuf == NULL)
|
|
return (ENOMEM);
|
|
ctx->mbuf_head = mbuf;
|
|
ctx->mbuf_tail = mbuf;
|
|
} else {
|
|
mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
|
|
if (mbuf == NULL)
|
|
return (ENOMEM);
|
|
ctx->mbuf_tail->m_next = mbuf;
|
|
ctx->mbuf_tail = mbuf;
|
|
}
|
|
|
|
mbuf->m_len = frag_len;
|
|
ctx->total_size += frag_len;
|
|
|
|
m_copyback(mbuf, 0, frag_len, va);
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
|
|
counter_exit();
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
|
|
struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
|
|
uint8_t buf_frag_num, uint16_t frag_len)
|
|
{
|
|
struct mbuf *mbuf;
|
|
void *page_addr;
|
|
vm_page_t page;
|
|
int page_idx;
|
|
void *va;
|
|
|
|
if (ctx->mbuf_tail == NULL) {
|
|
mbuf = m_gethdr(M_NOWAIT, MT_DATA);
|
|
if (mbuf == NULL)
|
|
return (ENOMEM);
|
|
ctx->mbuf_head = mbuf;
|
|
ctx->mbuf_tail = mbuf;
|
|
} else {
|
|
mbuf = m_get(M_NOWAIT, MT_DATA);
|
|
if (mbuf == NULL)
|
|
return (ENOMEM);
|
|
ctx->mbuf_tail->m_next = mbuf;
|
|
ctx->mbuf_tail = mbuf;
|
|
}
|
|
|
|
mbuf->m_len = frag_len;
|
|
ctx->total_size += frag_len;
|
|
|
|
page_idx = buf - rx->dqo.bufs;
|
|
page = rx->com.qpl->pages[page_idx];
|
|
page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
|
|
va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
|
|
|
|
/*
|
|
* Grab an extra ref to the page so that gve_mextadd_free
|
|
* does not end up freeing the page while the interface exists.
|
|
*/
|
|
vm_page_wire(page);
|
|
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
|
|
counter_exit();
|
|
|
|
MEXTADD(mbuf, va, frag_len,
|
|
gve_mextadd_free, page, page_addr,
|
|
0, EXT_NET_DRV);
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
|
|
struct gve_rx_compl_desc_dqo *compl_desc,
|
|
int *work_done)
|
|
{
|
|
bool is_last_frag = compl_desc->end_of_packet != 0;
|
|
union gve_rx_qpl_buf_id_dqo composed_id;
|
|
struct gve_dma_handle *page_dma_handle;
|
|
struct gve_rx_ctx *ctx = &rx->ctx;
|
|
struct gve_rx_buf_dqo *buf;
|
|
uint32_t num_pending_bufs;
|
|
uint8_t buf_frag_num;
|
|
uint16_t frag_len;
|
|
uint16_t buf_id;
|
|
int err;
|
|
|
|
composed_id.all = le16toh(compl_desc->buf_id);
|
|
buf_id = composed_id.buf_id;
|
|
buf_frag_num = composed_id.frag_num;
|
|
|
|
if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
|
|
device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
|
|
buf_id, rx->com.id);
|
|
gve_schedule_reset(priv);
|
|
goto drop_frag_clear_ctx;
|
|
}
|
|
buf = &rx->dqo.bufs[buf_id];
|
|
if (__predict_false(buf->num_nic_frags == 0 ||
|
|
buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
|
|
device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
|
|
"with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
|
|
buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
|
|
gve_schedule_reset(priv);
|
|
goto drop_frag_clear_ctx;
|
|
}
|
|
|
|
buf->num_nic_frags--;
|
|
|
|
if (__predict_false(ctx->drop_pkt))
|
|
goto drop_frag;
|
|
|
|
if (__predict_false(compl_desc->rx_error)) {
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
|
|
counter_exit();
|
|
goto drop_frag;
|
|
}
|
|
|
|
page_dma_handle = gve_get_page_dma_handle(rx, buf);
|
|
bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
|
|
BUS_DMASYNC_POSTREAD);
|
|
|
|
frag_len = compl_desc->packet_len;
|
|
if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
|
|
void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
|
|
|
|
err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
|
|
if (__predict_false(err != 0))
|
|
goto drop_frag;
|
|
(*work_done)++;
|
|
gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
|
|
return;
|
|
}
|
|
|
|
num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
|
|
err = gve_rx_post_new_dqo_qpl_buf(rx);
|
|
if (__predict_false(err != 0 &&
|
|
num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
|
|
/*
|
|
* Resort to copying this fragment into a cluster mbuf
|
|
* when the above threshold is breached and repost the
|
|
* incoming buffer. If we cannot find cluster mbufs,
|
|
* just drop the packet (to repost its buffer).
|
|
*/
|
|
err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
|
|
buf_frag_num, frag_len);
|
|
if (err != 0) {
|
|
counter_enter();
|
|
counter_u64_add_protected(
|
|
rx->stats.rx_dropped_pkt_buf_post_fail, 1);
|
|
counter_exit();
|
|
goto drop_frag;
|
|
}
|
|
gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
|
|
} else {
|
|
err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
|
|
buf_frag_num, frag_len);
|
|
if (__predict_false(err != 0)) {
|
|
counter_enter();
|
|
counter_u64_add_protected(
|
|
rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
|
|
counter_exit();
|
|
goto drop_frag;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Both the counts need to be checked.
|
|
*
|
|
* num_nic_frags == 0 implies no pending completions
|
|
* but not all frags may have yet been posted.
|
|
*
|
|
* next_idx == 0 implies all frags have been posted
|
|
* but there might be pending completions.
|
|
*/
|
|
if (buf->num_nic_frags == 0 && buf->next_idx == 0)
|
|
STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
|
|
|
|
if (is_last_frag) {
|
|
gve_rx_input_mbuf_dqo(rx, compl_desc);
|
|
(*work_done)++;
|
|
}
|
|
return;
|
|
|
|
drop_frag:
|
|
/* Clear the earlier frags if there were any */
|
|
m_freem(ctx->mbuf_head);
|
|
rx->ctx = (struct gve_rx_ctx){};
|
|
/* Drop the rest of the pkt if there are more frags */
|
|
ctx->drop_pkt = true;
|
|
/* Reuse the dropped frag's buffer */
|
|
gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
|
|
|
|
if (is_last_frag)
|
|
goto drop_frag_clear_ctx;
|
|
return;
|
|
|
|
drop_frag_clear_ctx:
|
|
counter_enter();
|
|
counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
|
|
counter_exit();
|
|
m_freem(ctx->mbuf_head);
|
|
rx->ctx = (struct gve_rx_ctx){};
|
|
}
|
|
|
|
static bool
|
|
gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
|
|
{
|
|
struct gve_rx_compl_desc_dqo *compl_desc;
|
|
uint32_t work_done = 0;
|
|
|
|
NET_EPOCH_ASSERT();
|
|
|
|
while (work_done < budget) {
|
|
bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
|
|
BUS_DMASYNC_POSTREAD);
|
|
|
|
compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
|
|
if (compl_desc->generation == rx->dqo.cur_gen_bit)
|
|
break;
|
|
/*
|
|
* Prevent generation bit from being read after the rest of the
|
|
* descriptor.
|
|
*/
|
|
rmb();
|
|
|
|
rx->cnt++;
|
|
rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
|
|
rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
|
|
|
|
if (gve_is_qpl(priv))
|
|
gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
|
|
else
|
|
gve_rx_dqo(priv, rx, compl_desc, &work_done);
|
|
}
|
|
|
|
if (work_done != 0)
|
|
tcp_lro_flush_all(&rx->lro);
|
|
|
|
gve_rx_post_buffers_dqo(rx, M_NOWAIT);
|
|
if (gve_is_qpl(priv))
|
|
gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
|
|
return (work_done == budget);
|
|
}
|
|
|
|
void
|
|
gve_rx_cleanup_tq_dqo(void *arg, int pending)
|
|
{
|
|
struct gve_rx_ring *rx = arg;
|
|
struct gve_priv *priv = rx->com.priv;
|
|
|
|
if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
|
|
return;
|
|
|
|
if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
|
|
taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
|
|
return;
|
|
}
|
|
|
|
gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
|
|
GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
|
|
}
|