Merge branch 'freebsd/current/main' into hardened/current/master

This commit is contained in:
HardenedBSD Sync Services 2024-11-06 12:01:24 -07:00
commit 4f4bed81b8
No known key found for this signature in database
21 changed files with 3505 additions and 190 deletions

View File

@ -1,6 +1,6 @@
.\" SPDX-License-Identifier: BSD-3-Clause
.\"
.\" Copyright (c) 2023 Google LLC
.\" Copyright (c) 2023-2024 Google LLC
.\"
.\" Redistribution and use in source and binary forms, with or without modification,
.\" are permitted provided that the following conditions are met:
@ -26,7 +26,7 @@
.\" ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
.\" SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.Dd April 26, 2023
.Dd October 14, 2024
.Dt GVE 4
.Os
.Sh NAME
@ -192,16 +192,63 @@ These two messages correspond to the NIC alerting the driver to link state chang
.Pp
Apart from these messages, the driver exposes per-queue packet and error counters as sysctl nodes.
Global (across queues) counters can be read using
.Xr netstat 8 .
.Xr netstat 1 .
.Sh SYSCTL VARIABLES
.Nm
exposes the following
.Xr sysctl 8
variables:
.Bl -tag -width indent
.It Va hw.gve.driver_version
The driver version.
This is read-only.
.It Va hw.gve.queue_format
The queue format in use.
This is read-only.
.It Va hw.gve.disable_hw_lro
Setting this boot-time tunable to 1 disables Large Receive Offload (LRO) in the NIC.
The default value is 0, which means hardware LRO is enabled by default.
The software LRO stack in the kernel is always used.
This sysctl variable needs to be set before loading the driver, using
.Xr loader.conf 5 .
.El
.Sh LIMITATIONS
.Nm
does not support the transmission of VLAN-tagged packets.
All VLAN-tagged traffic is dropped.
.Sh QUEUE FORMATS
.Nm
features different datapath modes called queue formats:
.Pp
.Bl -bullet -compact
.It
GQI_QPL: "QPL" stands for "Queue Page List" and refers to the fact that
hardware expects a fixed bounce buffer and cannot access arbitrary memory.
GQI is the older descriptor format.
The G in "GQI" refers to an older generation of hardware, and the "QI"
stands for "Queue In-order" referring to the fact that the NIC sends
Tx and Rx completions in the same order as the one in which the corresponding
descriptors were posted by the driver.
.It
DQO_RDA: DQO is the descriptor format required to take full advantage of
next generation VM shapes.
"RDA" stands for "Raw DMA Addressing" and refers to the fact that hardware
can work with DMA-ed packets and does not expect them to be copied into or
out of a fixed bounce buffer.
The D in "DQO" refers to a newer generation of hardware, and the "QO"
stands for "Queue Out-of-order" referring to the fact that the NIC might
send Tx and Rx completions in an order different from the one in which
the corresponding descriptors were posted by the driver.
.It
DQO_QPL: The next generation descriptor format in the "QPL" mode.
.El
.Sh SUPPORT
Please email gvnic-drivers@google.com with the specifics of the issue encountered.
.Sh SEE ALSO
.Xr netstat 1 ,
.Xr loader.conf 5 ,
.Xr ifconfig 8 ,
.Xr netstat 8
.Xr sysctl 8
.Sh HISTORY
The
.Nm

View File

@ -1732,8 +1732,10 @@ dev/gve/gve_adminq.c optional gve
dev/gve/gve_main.c optional gve
dev/gve/gve_qpl.c optional gve
dev/gve/gve_rx.c optional gve
dev/gve/gve_rx_dqo.c optional gve
dev/gve/gve_sysctl.c optional gve
dev/gve/gve_tx.c optional gve
dev/gve/gve_tx_dqo.c optional gve
dev/gve/gve_utils.c optional gve
dev/goldfish/goldfish_rtc.c optional goldfish_rtc fdt
dev/gpio/acpi_gpiobus.c optional acpi gpio

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -53,6 +53,9 @@
/* Each RX bounce buffer page can fit two packet buffers. */
#define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2)
/* PTYPEs are always 10 bits. */
#define GVE_NUM_PTYPES 1024
/*
* Number of descriptors per queue page list.
* Page count AKA QPL size can be derived by dividing the number of elements in
@ -102,6 +105,7 @@ enum gve_queue_format {
GVE_GQI_RDA_FORMAT = 0x1,
GVE_GQI_QPL_FORMAT = 0x2,
GVE_DQO_RDA_FORMAT = 0x3,
GVE_DQO_QPL_FORMAT = 0x4,
};
enum gve_state_flags_bit {
@ -223,31 +227,93 @@ struct gve_rxq_stats {
counter_u64_t rx_frag_flip_cnt;
counter_u64_t rx_frag_copy_cnt;
counter_u64_t rx_dropped_pkt_desc_err;
counter_u64_t rx_dropped_pkt_buf_post_fail;
counter_u64_t rx_dropped_pkt_mbuf_alloc_fail;
counter_u64_t rx_mbuf_dmamap_err;
counter_u64_t rx_mbuf_mclget_null;
};
#define NUM_RX_STATS (sizeof(struct gve_rxq_stats) / sizeof(counter_u64_t))
union gve_rx_qpl_buf_id_dqo {
struct {
uint16_t buf_id:11; /* Index into rx->dqo.bufs */
uint8_t frag_num:5; /* Which frag in the QPL page */
};
uint16_t all;
} __packed;
_Static_assert(sizeof(union gve_rx_qpl_buf_id_dqo) == 2,
"gve: bad dqo qpl rx buf id length");
struct gve_rx_buf_dqo {
union {
/* RDA */
struct {
struct mbuf *mbuf;
bus_dmamap_t dmamap;
uint64_t addr;
bool mapped;
};
/* QPL */
struct {
uint8_t num_nic_frags; /* number of pending completions */
uint8_t next_idx; /* index of the next frag to post */
/* for chaining rx->dqo.used_bufs */
STAILQ_ENTRY(gve_rx_buf_dqo) stailq_entry;
};
};
/* for chaining rx->dqo.free_bufs */
SLIST_ENTRY(gve_rx_buf_dqo) slist_entry;
};
/* power-of-2 sized receive ring */
struct gve_rx_ring {
struct gve_ring_com com;
struct gve_dma_handle desc_ring_mem;
struct gve_dma_handle data_ring_mem;
uint32_t cnt; /* free-running total number of completed packets */
uint32_t fill_cnt; /* free-running total number of descs and buffs posted */
/* accessed in the receive hot path */
struct {
struct gve_rx_desc *desc_ring;
union gve_rx_data_slot *data_ring;
struct gve_rx_slot_page_info *page_info;
union {
/* GQI-only fields */
struct {
struct gve_dma_handle data_ring_mem;
struct gve_rx_ctx ctx;
struct lro_ctrl lro;
uint8_t seq_no; /* helps traverse the descriptor ring */
uint32_t cnt; /* free-running total number of completed packets */
uint32_t fill_cnt; /* free-running total number of descs and buffs posted */
uint32_t mask; /* masks the cnt and fill_cnt to the size of the ring */
struct gve_rxq_stats stats;
} __aligned(CACHE_LINE_SIZE);
/* accessed in the GQ receive hot path */
struct gve_rx_desc *desc_ring;
union gve_rx_data_slot *data_ring;
struct gve_rx_slot_page_info *page_info;
uint32_t mask; /* masks the cnt and fill_cnt to the size of the ring */
uint8_t seq_no; /* helps traverse the descriptor ring */
};
/* DQO-only fields */
struct {
struct gve_dma_handle compl_ring_mem;
struct gve_rx_compl_desc_dqo *compl_ring;
struct gve_rx_desc_dqo *desc_ring;
struct gve_rx_buf_dqo *bufs; /* Parking place for posted buffers */
bus_dma_tag_t buf_dmatag; /* To dmamap posted mbufs with */
uint32_t buf_cnt; /* Size of the bufs array */
uint32_t mask; /* One less than the sizes of the desc and compl rings */
uint32_t head; /* The index at which to post the next buffer at */
uint32_t tail; /* The index at which to receive the next compl at */
uint8_t cur_gen_bit; /* Gets flipped on every cycle of the compl ring */
SLIST_HEAD(, gve_rx_buf_dqo) free_bufs;
/*
* Only used in QPL mode. Pages refered to by if_input-ed mbufs
* stay parked here till their wire count comes back to 1.
* Pages are moved here after there aren't any pending completions.
*/
STAILQ_HEAD(, gve_rx_buf_dqo) used_bufs;
} dqo;
};
struct lro_ctrl lro;
struct gve_rx_ctx ctx;
struct gve_rxq_stats stats;
} __aligned(CACHE_LINE_SIZE);
@ -275,13 +341,41 @@ struct gve_txq_stats {
counter_u64_t tpackets;
counter_u64_t tso_packet_cnt;
counter_u64_t tx_dropped_pkt;
counter_u64_t tx_dropped_pkt_nospace_device;
counter_u64_t tx_delayed_pkt_nospace_device;
counter_u64_t tx_dropped_pkt_nospace_bufring;
counter_u64_t tx_delayed_pkt_nospace_descring;
counter_u64_t tx_delayed_pkt_nospace_compring;
counter_u64_t tx_delayed_pkt_nospace_qpl_bufs;
counter_u64_t tx_delayed_pkt_tsoerr;
counter_u64_t tx_dropped_pkt_vlan;
counter_u64_t tx_mbuf_collapse;
counter_u64_t tx_mbuf_defrag;
counter_u64_t tx_mbuf_defrag_err;
counter_u64_t tx_mbuf_dmamap_enomem_err;
counter_u64_t tx_mbuf_dmamap_err;
};
#define NUM_TX_STATS (sizeof(struct gve_txq_stats) / sizeof(counter_u64_t))
struct gve_tx_pending_pkt_dqo {
struct mbuf *mbuf;
union {
/* RDA */
bus_dmamap_t dmamap;
/* QPL */
struct {
/*
* A linked list of entries from qpl_bufs that served
* as the bounce buffer for this packet.
*/
int32_t qpl_buf_head;
uint32_t num_qpl_bufs;
};
};
uint8_t state; /* the gve_packet_state enum */
int next; /* To chain the free_pending_pkts lists */
};
/* power-of-2 sized transmit ring */
struct gve_tx_ring {
struct gve_ring_com com;
@ -289,24 +383,133 @@ struct gve_tx_ring {
struct task xmit_task;
struct taskqueue *xmit_tq;
bool stopped;
/* accessed in the transmit hot path */
struct {
union gve_tx_desc *desc_ring;
struct gve_tx_buffer_state *info;
struct buf_ring *br;
/* Accessed when writing descriptors */
struct buf_ring *br;
struct mtx ring_mtx;
struct gve_tx_fifo fifo;
struct mtx ring_mtx;
uint32_t req; /* free-running total number of packets written to the nic */
uint32_t done; /* free-running total number of completed packets */
uint32_t req; /* free-running total number of packets written to the nic */
uint32_t done; /* free-running total number of completed packets */
uint32_t mask; /* masks the req and done to the size of the ring */
struct gve_txq_stats stats;
} __aligned(CACHE_LINE_SIZE);
union {
/* GQI specific stuff */
struct {
union gve_tx_desc *desc_ring;
struct gve_tx_buffer_state *info;
struct gve_tx_fifo fifo;
uint32_t mask; /* masks the req and done to the size of the ring */
};
/* DQO specific stuff */
struct {
struct gve_dma_handle compl_ring_mem;
/* Accessed when writing descriptors */
struct {
union gve_tx_desc_dqo *desc_ring;
uint32_t desc_mask; /* masks head and tail to the size of desc_ring */
uint32_t desc_head; /* last desc read by NIC, cached value of hw_tx_head */
uint32_t desc_tail; /* last desc written by driver */
uint32_t last_re_idx; /* desc which last had "report event" set */
/*
* The head index of a singly linked list containing pending packet objects
* to park mbufs till the NIC sends completions. Once this list is depleted,
* the "_prd" suffixed producer list, grown by the completion taskqueue,
* is stolen.
*/
int32_t free_pending_pkts_csm;
/*
* The head index of a singly linked list representing QPL page fragments
* to copy mbuf payload into for the NIC to see. Once this list is depleted,
* the "_prd" suffixed producer list, grown by the completion taskqueue,
* is stolen.
*
* Only used in QPL mode. int32_t because atomic_swap_16 doesn't exist.
*/
int32_t free_qpl_bufs_csm;
uint32_t qpl_bufs_consumed; /* Allows quickly checking for buf availability */
uint32_t qpl_bufs_produced_cached; /* Cached value of qpl_bufs_produced */
/* DMA params for mapping Tx mbufs. Only used in RDA mode. */
bus_dma_tag_t buf_dmatag;
} __aligned(CACHE_LINE_SIZE);
/* Accessed when processing completions */
struct {
struct gve_tx_compl_desc_dqo *compl_ring;
uint32_t compl_mask; /* masks head to the size of compl_ring */
uint32_t compl_head; /* last completion read by driver */
uint8_t cur_gen_bit; /* NIC flips a bit on every pass */
uint32_t hw_tx_head; /* last desc read by NIC */
/*
* The completion taskqueue moves pending-packet objects to this
* list after freeing the mbuf. The "_prd" denotes that this is
* a producer list. The trasnmit taskqueue steals this list once
* its consumer list, with the "_csm" suffix, is depleted.
*/
int32_t free_pending_pkts_prd;
/*
* The completion taskqueue moves the QPL pages corresponding to a
* completed packet into this list. It is only used in QPL mode.
* The "_prd" denotes that this is a producer list. The trasnmit
* taskqueue steals this list once its consumer list, with the "_csm"
* suffix, is depleted.
*
* Only used in QPL mode. int32_t because atomic_swap_16 doesn't exist.
*/
int32_t free_qpl_bufs_prd;
uint32_t qpl_bufs_produced;
} __aligned(CACHE_LINE_SIZE);
/* Accessed by both the completion and xmit loops */
struct {
/* completion tags index into this array */
struct gve_tx_pending_pkt_dqo *pending_pkts;
uint16_t num_pending_pkts;
/*
* Represents QPL page fragments. An index into this array
* always represents the same QPL page fragment. The value
* is also an index into this array and servers as a means
* to chain buffers into linked lists whose heads are
* either free_qpl_bufs_prd or free_qpl_bufs_csm or
* qpl_bufs_head.
*/
int32_t *qpl_bufs;
} __aligned(CACHE_LINE_SIZE);
} dqo;
};
struct gve_txq_stats stats;
} __aligned(CACHE_LINE_SIZE);
enum gve_packet_state {
/*
* Packet does not yet have a dmamap created.
* This should always be zero since state is not explicitly initialized.
*/
GVE_PACKET_STATE_UNALLOCATED,
/* Packet has a dmamap and is in free list, available to be allocated. */
GVE_PACKET_STATE_FREE,
/* Packet is expecting a regular data completion */
GVE_PACKET_STATE_PENDING_DATA_COMPL,
};
struct gve_ptype {
uint8_t l3_type; /* `gve_l3_type` in gve_adminq.h */
uint8_t l4_type; /* `gve_l4_type` in gve_adminq.h */
};
struct gve_ptype_lut {
struct gve_ptype ptypes[GVE_NUM_PTYPES];
};
struct gve_priv {
if_t ifp;
device_t dev;
@ -348,6 +551,8 @@ struct gve_priv {
struct gve_tx_ring *tx;
struct gve_rx_ring *rx;
struct gve_ptype_lut *ptype_lut_dqo;
/*
* Admin queue - see gve_adminq.h
* Since AQ cmds do not run in steady state, 32 bit counters suffice
@ -370,6 +575,7 @@ struct gve_priv {
uint32_t adminq_dcfg_device_resources_cnt;
uint32_t adminq_set_driver_parameter_cnt;
uint32_t adminq_verify_driver_compatibility_cnt;
uint32_t adminq_get_ptype_map_cnt;
uint32_t interface_up_cnt;
uint32_t interface_down_cnt;
@ -400,6 +606,19 @@ gve_clear_state_flag(struct gve_priv *priv, int pos)
BIT_CLR_ATOMIC(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags);
}
static inline bool
gve_is_gqi(struct gve_priv *priv)
{
return (priv->queue_format == GVE_GQI_QPL_FORMAT);
}
static inline bool
gve_is_qpl(struct gve_priv *priv)
{
return (priv->queue_format == GVE_GQI_QPL_FORMAT ||
priv->queue_format == GVE_DQO_QPL_FORMAT);
}
/* Defined in gve_main.c */
void gve_schedule_reset(struct gve_priv *priv);
@ -407,12 +626,14 @@ void gve_schedule_reset(struct gve_priv *priv);
uint32_t gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset);
void gve_reg_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val);
void gve_db_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val);
void gve_db_bar_dqo_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val);
/* QPL (Queue Page List) functions defined in gve_qpl.c */
int gve_alloc_qpls(struct gve_priv *priv);
void gve_free_qpls(struct gve_priv *priv);
int gve_register_qpls(struct gve_priv *priv);
int gve_unregister_qpls(struct gve_priv *priv);
void gve_mextadd_free(struct mbuf *mbuf);
/* TX functions defined in gve_tx.c */
int gve_alloc_tx_rings(struct gve_priv *priv);
@ -425,6 +646,15 @@ void gve_qflush(if_t ifp);
void gve_xmit_tq(void *arg, int pending);
void gve_tx_cleanup_tq(void *arg, int pending);
/* TX functions defined in gve_tx_dqo.c */
int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i);
void gve_tx_free_ring_dqo(struct gve_priv *priv, int i);
void gve_clear_tx_ring_dqo(struct gve_priv *priv, int i);
int gve_tx_intr_dqo(void *arg);
int gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr);
int gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf);
void gve_tx_cleanup_tq_dqo(void *arg, int pending);
/* RX functions defined in gve_rx.c */
int gve_alloc_rx_rings(struct gve_priv *priv);
void gve_free_rx_rings(struct gve_priv *priv);
@ -433,6 +663,14 @@ int gve_destroy_rx_rings(struct gve_priv *priv);
int gve_rx_intr(void *arg);
void gve_rx_cleanup_tq(void *arg, int pending);
/* RX functions defined in gve_rx_dqo.c */
int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i);
void gve_rx_free_ring_dqo(struct gve_priv *priv, int i);
void gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx);
void gve_clear_rx_ring_dqo(struct gve_priv *priv, int i);
int gve_rx_intr_dqo(void *arg);
void gve_rx_cleanup_tq_dqo(void *arg, int pending);
/* DMA functions defined in gve_utils.c */
int gve_dma_alloc_coherent(struct gve_priv *priv, int size, int align,
struct gve_dma_handle *dma);
@ -447,7 +685,10 @@ int gve_alloc_irqs(struct gve_priv *priv);
void gve_unmask_all_queue_irqs(struct gve_priv *priv);
void gve_mask_all_queue_irqs(struct gve_priv *priv);
/* Systcl functions defined in gve_sysctl.c*/
/* Systcl functions defined in gve_sysctl.c */
extern bool gve_disable_hw_lro;
extern char gve_queue_format[8];
extern char gve_version[8];
void gve_setup_sysctl(struct gve_priv *priv);
void gve_accum_stats(struct gve_priv *priv, uint64_t *rpackets,
uint64_t *rbytes, uint64_t *rx_dropped_pkt, uint64_t *tpackets,

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -57,6 +57,8 @@ void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_descriptor *device_descriptor,
struct gve_device_option *option,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
{
uint32_t req_feat_mask = be32toh(option->required_features_mask);
@ -85,6 +87,40 @@ void gve_parse_device_option(struct gve_priv *priv,
*dev_op_gqi_qpl = (void *)(option + 1);
break;
case GVE_DEV_OPT_ID_DQO_RDA:
if (option_length < sizeof(**dev_op_dqo_rda) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA) {
device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
"DQO RDA", (int)sizeof(**dev_op_dqo_rda),
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA,
option_length, req_feat_mask);
break;
}
if (option_length > sizeof(**dev_op_dqo_rda)) {
device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT,
"DQO RDA");
}
*dev_op_dqo_rda = (void *)(option + 1);
break;
case GVE_DEV_OPT_ID_DQO_QPL:
if (option_length < sizeof(**dev_op_dqo_qpl) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT,
"DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
option_length, req_feat_mask);
break;
}
if (option_length > sizeof(**dev_op_dqo_qpl)) {
device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT,
"DQO QPL");
}
*dev_op_dqo_qpl = (void *)(option + 1);
break;
case GVE_DEV_OPT_ID_JUMBO_FRAMES:
if (option_length < sizeof(**dev_op_jumbo_frames) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
@ -117,6 +153,8 @@ static int
gve_process_device_options(struct gve_priv *priv,
struct gve_device_descriptor *descriptor,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
{
char *desc_end = (char *)descriptor + be16toh(descriptor->total_length);
@ -130,12 +168,15 @@ gve_process_device_options(struct gve_priv *priv,
if ((char *)(dev_opt + 1) > desc_end ||
(char *)(dev_opt + 1) + be16toh(dev_opt->option_length) > desc_end) {
device_printf(priv->dev,
"options exceed device_descriptor's total length.\n");
"options exceed device descriptor's total length.\n");
return (EINVAL);
}
gve_parse_device_option(priv, descriptor, dev_opt,
dev_op_gqi_qpl, dev_op_jumbo_frames);
dev_op_gqi_qpl,
dev_op_dqo_rda,
dev_op_dqo_qpl,
dev_op_jumbo_frames);
dev_opt = (void *)((char *)(dev_opt + 1) + be16toh(dev_opt->option_length));
}
@ -221,16 +262,35 @@ gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index)
cmd.opcode = htobe32(GVE_ADMINQ_CREATE_RX_QUEUE);
cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
.queue_id = htobe32(queue_index),
.index = htobe32(queue_index),
.ntfy_id = htobe32(rx->com.ntfy_id),
.queue_resources_addr = htobe64(qres_dma->bus_addr),
.rx_desc_ring_addr = htobe64(rx->desc_ring_mem.bus_addr),
.rx_data_ring_addr = htobe64(rx->data_ring_mem.bus_addr),
.queue_page_list_id = htobe32((rx->com.qpl)->id),
.rx_ring_size = htobe16(priv->rx_desc_cnt),
.packet_buffer_size = htobe16(GVE_DEFAULT_RX_BUFFER_SIZE),
};
if (gve_is_gqi(priv)) {
cmd.create_rx_queue.rx_desc_ring_addr =
htobe64(rx->desc_ring_mem.bus_addr);
cmd.create_rx_queue.rx_data_ring_addr =
htobe64(rx->data_ring_mem.bus_addr);
cmd.create_rx_queue.index =
htobe32(queue_index);
cmd.create_rx_queue.queue_page_list_id =
htobe32((rx->com.qpl)->id);
} else {
cmd.create_rx_queue.queue_page_list_id =
htobe32(GVE_RAW_ADDRESSING_QPL_ID);
cmd.create_rx_queue.rx_desc_ring_addr =
htobe64(rx->dqo.compl_ring_mem.bus_addr);
cmd.create_rx_queue.rx_data_ring_addr =
htobe64(rx->desc_ring_mem.bus_addr);
cmd.create_rx_queue.rx_buff_ring_size =
htobe16(priv->rx_desc_cnt);
cmd.create_rx_queue.enable_rsc =
!!((if_getcapenable(priv->ifp) & IFCAP_LRO) &&
!gve_disable_hw_lro);
}
return (gve_adminq_execute_cmd(priv, &cmd));
}
@ -272,11 +332,21 @@ gve_adminq_create_tx_queue(struct gve_priv *priv, uint32_t queue_index)
.queue_id = htobe32(queue_index),
.queue_resources_addr = htobe64(qres_dma->bus_addr),
.tx_ring_addr = htobe64(tx->desc_ring_mem.bus_addr),
.queue_page_list_id = htobe32((tx->com.qpl)->id),
.ntfy_id = htobe32(tx->com.ntfy_id),
.tx_ring_size = htobe16(priv->tx_desc_cnt),
};
if (gve_is_gqi(priv)) {
cmd.create_tx_queue.queue_page_list_id =
htobe32((tx->com.qpl)->id);
} else {
cmd.create_tx_queue.queue_page_list_id =
htobe32(GVE_RAW_ADDRESSING_QPL_ID);
cmd.create_tx_queue.tx_comp_ring_addr =
htobe64(tx->dqo.compl_ring_mem.bus_addr);
cmd.create_tx_queue.tx_comp_ring_size =
htobe16(priv->tx_desc_cnt);
}
return (gve_adminq_execute_cmd(priv, &cmd));
}
@ -338,6 +408,8 @@ gve_adminq_describe_device(struct gve_priv *priv)
struct gve_device_descriptor *desc;
struct gve_dma_handle desc_mem;
struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
uint32_t supported_features_mask = 0;
int rc;
@ -366,12 +438,35 @@ gve_adminq_describe_device(struct gve_priv *priv)
bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_POSTREAD);
rc = gve_process_device_options(priv, desc, &dev_op_gqi_qpl,
rc = gve_process_device_options(priv, desc,
&dev_op_gqi_qpl,
&dev_op_dqo_rda,
&dev_op_dqo_qpl,
&dev_op_jumbo_frames);
if (rc != 0)
goto free_device_descriptor;
if (dev_op_gqi_qpl != NULL) {
if (dev_op_dqo_rda != NULL) {
snprintf(gve_queue_format, sizeof(gve_queue_format),
"%s", "DQO RDA");
priv->queue_format = GVE_DQO_RDA_FORMAT;
supported_features_mask = be32toh(
dev_op_dqo_rda->supported_features_mask);
if (bootverbose)
device_printf(priv->dev,
"Driver is running with DQO RDA queue format.\n");
} else if (dev_op_dqo_qpl != NULL) {
snprintf(gve_queue_format, sizeof(gve_queue_format),
"%s", "DQO QPL");
priv->queue_format = GVE_DQO_QPL_FORMAT;
supported_features_mask = be32toh(
dev_op_dqo_qpl->supported_features_mask);
if (bootverbose)
device_printf(priv->dev,
"Driver is running with DQO QPL queue format.\n");
} else if (dev_op_gqi_qpl != NULL) {
snprintf(gve_queue_format, sizeof(gve_queue_format),
"%s", "GQI QPL");
priv->queue_format = GVE_GQI_QPL_FORMAT;
supported_features_mask = be32toh(
dev_op_gqi_qpl->supported_features_mask);
@ -380,7 +475,7 @@ gve_adminq_describe_device(struct gve_priv *priv)
"Driver is running with GQI QPL queue format.\n");
} else {
device_printf(priv->dev, "No compatible queue formats\n");
rc = (EINVAL);
rc = EINVAL;
goto free_device_descriptor;
}
@ -506,6 +601,41 @@ gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
return (gve_adminq_execute_cmd(priv, &aq_cmd));
}
int
gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
struct gve_ptype_lut *ptype_lut_dqo)
{
struct gve_adminq_command aq_cmd = (struct gve_adminq_command){};
struct gve_ptype_map *ptype_map;
struct gve_dma_handle dma;
int err = 0;
int i;
err = gve_dma_alloc_coherent(priv, sizeof(*ptype_map), PAGE_SIZE, &dma);
if (err)
return (err);
ptype_map = dma.cpu_addr;
aq_cmd.opcode = htobe32(GVE_ADMINQ_GET_PTYPE_MAP);
aq_cmd.get_ptype_map = (struct gve_adminq_get_ptype_map) {
.ptype_map_len = htobe64(sizeof(*ptype_map)),
.ptype_map_addr = htobe64(dma.bus_addr),
};
err = gve_adminq_execute_cmd(priv, &aq_cmd);
if (err)
goto err;
/* Populate ptype_lut_dqo. */
for (i = 0; i < GVE_NUM_PTYPES; i++) {
ptype_lut_dqo->ptypes[i].l3_type = ptype_map->ptypes[i].l3_type;
ptype_lut_dqo->ptypes[i].l4_type = ptype_map->ptypes[i].l4_type;
}
err:
gve_dma_free_coherent(&dma);
return (err);
}
int
gve_adminq_alloc(struct gve_priv *priv)
{
@ -543,6 +673,7 @@ gve_adminq_alloc(struct gve_priv *priv)
priv->adminq_destroy_rx_queue_cnt = 0;
priv->adminq_dcfg_device_resources_cnt = 0;
priv->adminq_set_driver_parameter_cnt = 0;
priv->adminq_get_ptype_map_cnt = 0;
gve_reg_bar_write_4(priv, GVE_REG_ADMINQ_ADDR,
priv->adminq_bus_addr / ADMINQ_SIZE);
@ -772,6 +903,10 @@ gve_adminq_issue_cmd(struct gve_priv *priv, struct gve_adminq_command *cmd_orig)
priv->adminq_verify_driver_compatibility_cnt++;
break;
case GVE_ADMINQ_GET_PTYPE_MAP:
priv->adminq_get_ptype_map_cnt++;
break;
default:
device_printf(priv->dev, "Unknown AQ command opcode %d\n", opcode);
}

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -137,9 +137,20 @@ _Static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4,
struct gve_device_option_dqo_rda {
__be32 supported_features_mask;
__be16 tx_comp_ring_entries;
__be16 rx_buff_ring_entries;
};
_Static_assert(sizeof(struct gve_device_option_dqo_rda) == 4,
_Static_assert(sizeof(struct gve_device_option_dqo_rda) == 8,
"gve: bad admin queue struct length");
struct gve_device_option_dqo_qpl {
__be32 supported_features_mask;
__be16 tx_comp_ring_entries;
__be16 rx_buff_ring_entries;
};
_Static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8,
"gve: bad admin queue struct length");
struct gve_device_option_modify_ring {
@ -166,6 +177,7 @@ enum gve_dev_opt_id {
GVE_DEV_OPT_ID_GQI_QPL = 0x3,
GVE_DEV_OPT_ID_DQO_RDA = 0x4,
GVE_DEV_OPT_ID_MODIFY_RING = 0x6,
GVE_DEV_OPT_ID_DQO_QPL = 0x7,
GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
};
@ -180,6 +192,7 @@ enum gve_dev_opt_req_feat_mask {
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
};
@ -194,9 +207,8 @@ enum gve_sup_feature_mask {
enum gve_driver_capability {
gve_driver_capability_gqi_qpl = 0,
gve_driver_capability_gqi_rda = 1,
gve_driver_capability_dqo_qpl = 2, /* reserved for future use */
gve_driver_capability_dqo_qpl = 2,
gve_driver_capability_dqo_rda = 3,
gve_driver_capability_alt_miss_compl = 4,
};
#define GVE_CAP1(a) BIT((int) a)
@ -209,7 +221,10 @@ enum gve_driver_capability {
* Only a few bits (as shown in `gve_driver_compatibility`) are currently
* defined. The rest are reserved for future use.
*/
#define GVE_DRIVER_CAPABILITY_FLAGS1 (GVE_CAP1(gve_driver_capability_gqi_qpl))
#define GVE_DRIVER_CAPABILITY_FLAGS1 \
(GVE_CAP1(gve_driver_capability_gqi_qpl) | \
GVE_CAP1(gve_driver_capability_dqo_qpl) | \
GVE_CAP1(gve_driver_capability_dqo_rda))
#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS4 0x0
@ -282,6 +297,8 @@ struct gve_adminq_create_tx_queue {
_Static_assert(sizeof(struct gve_adminq_create_tx_queue) == 48,
"gve: bad admin queue struct length");
#define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF
struct gve_adminq_create_rx_queue {
__be32 queue_id;
__be32 index;
@ -352,6 +369,23 @@ struct stats {
_Static_assert(sizeof(struct stats) == 16,
"gve: bad admin queue struct length");
/* These are control path types for PTYPE which are the same as the data path
* types.
*/
struct gve_ptype_entry {
uint8_t l3_type;
uint8_t l4_type;
};
struct gve_ptype_map {
struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */
};
struct gve_adminq_get_ptype_map {
__be64 ptype_map_len;
__be64 ptype_map_addr;
};
struct gve_adminq_command {
__be32 opcode;
__be32 status;
@ -368,6 +402,7 @@ struct gve_adminq_command {
struct gve_adminq_set_driver_parameter set_driver_param;
struct gve_adminq_verify_driver_compatibility
verify_driver_compatibility;
struct gve_adminq_get_ptype_map get_ptype_map;
uint8_t reserved[56];
};
};
@ -375,6 +410,24 @@ struct gve_adminq_command {
_Static_assert(sizeof(struct gve_adminq_command) == 64,
"gve: bad admin queue struct length");
enum gve_l3_type {
/* Must be zero so zero initialized LUT is unknown. */
GVE_L3_TYPE_UNKNOWN = 0,
GVE_L3_TYPE_OTHER,
GVE_L3_TYPE_IPV4,
GVE_L3_TYPE_IPV6,
};
enum gve_l4_type {
/* Must be zero so zero initialized LUT is unknown. */
GVE_L4_TYPE_UNKNOWN = 0,
GVE_L4_TYPE_OTHER,
GVE_L4_TYPE_TCP,
GVE_L4_TYPE_UDP,
GVE_L4_TYPE_ICMP,
GVE_L4_TYPE_SCTP,
};
int gve_adminq_create_rx_queues(struct gve_priv *priv, uint32_t num_queues);
int gve_adminq_create_tx_queues(struct gve_priv *priv, uint32_t num_queues);
int gve_adminq_destroy_tx_queues(struct gve_priv *priv, uint32_t num_queues);
@ -387,8 +440,10 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv);
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
void gve_release_adminq(struct gve_priv *priv);
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl);
struct gve_queue_page_list *qpl);
int gve_adminq_unregister_page_list(struct gve_priv *priv, uint32_t page_list_id);
int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
uint64_t driver_info_len, vm_paddr_t driver_info_addr);
uint64_t driver_info_len, vm_paddr_t driver_info_addr);
int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
struct gve_ptype_lut *ptype_lut);
#endif /* _GVE_AQ_H_ */

321
sys/dev/gve/gve_dqo.h Normal file
View File

@ -0,0 +1,321 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* GVE DQO Descriptor formats */
#ifndef _GVE_DESC_DQO_H_
#define _GVE_DESC_DQO_H_
#include "gve_plat.h"
#define GVE_ITR_ENABLE_BIT_DQO BIT(0)
#define GVE_ITR_NO_UPDATE_DQO (3 << 3)
#define GVE_ITR_INTERVAL_DQO_SHIFT 5
#define GVE_ITR_INTERVAL_DQO_MASK ((1 << 12) - 1)
#define GVE_TX_IRQ_RATELIMIT_US_DQO 50
#define GVE_RX_IRQ_RATELIMIT_US_DQO 20
#define GVE_TX_MAX_HDR_SIZE_DQO 255
#define GVE_TX_MIN_TSO_MSS_DQO 88
/*
* Ringing the doorbell too often can hurt performance.
*
* HW requires this value to be at least 8.
*/
#define GVE_RX_BUF_THRESH_DQO 32
/*
* Start dropping RX fragments if at least these many
* buffers cannot be posted to the NIC.
*/
#define GVE_RX_DQO_MIN_PENDING_BUFS 128
#define GVE_DQ_NUM_FRAGS_IN_PAGE (PAGE_SIZE / GVE_DEFAULT_RX_BUFFER_SIZE)
/*
* gve_rx_qpl_buf_id_dqo's 11 bit wide buf_id field limits the total
* number of pages per QPL to 2048.
*/
#define GVE_RX_NUM_QPL_PAGES_DQO 2048
/* 2K TX buffers for DQO-QPL */
#define GVE_TX_BUF_SHIFT_DQO 11
#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
#define GVE_TX_NUM_QPL_PAGES_DQO 512
/* Basic TX descriptor (DTYPE 0x0C) */
struct gve_tx_pkt_desc_dqo {
__le64 buf_addr;
/* Must be GVE_TX_PKT_DESC_DTYPE_DQO (0xc) */
uint8_t dtype:5;
/* Denotes the last descriptor of a packet. */
uint8_t end_of_packet:1;
uint8_t checksum_offload_enable:1;
/* If set, will generate a descriptor completion for this descriptor. */
uint8_t report_event:1;
uint8_t reserved0;
__le16 reserved1;
/* The TX completion for this packet will contain this tag. */
__le16 compl_tag;
uint16_t buf_size:14;
uint16_t reserved2:2;
} __packed;
_Static_assert(sizeof(struct gve_tx_pkt_desc_dqo) == 16,
"gve: bad dqo desc struct length");
#define GVE_TX_PKT_DESC_DTYPE_DQO 0xc
/*
* Maximum number of data descriptors allowed per packet, or per-TSO segment.
*/
#define GVE_TX_MAX_DATA_DESCS_DQO 10
#define GVE_TX_MAX_BUF_SIZE_DQO ((16 * 1024) - 1)
#define GVE_TSO_MAXSIZE_DQO IP_MAXPACKET
_Static_assert(GVE_TX_MAX_BUF_SIZE_DQO * GVE_TX_MAX_DATA_DESCS_DQO >=
GVE_TSO_MAXSIZE_DQO,
"gve: bad tso parameters");
/*
* "report_event" on TX packet descriptors may only be reported on the last
* descriptor of a TX packet, and they must be spaced apart with at least this
* value.
*/
#define GVE_TX_MIN_RE_INTERVAL 32
struct gve_tx_context_cmd_dtype {
uint8_t dtype:5;
uint8_t tso:1;
uint8_t reserved1:2;
uint8_t reserved2;
};
_Static_assert(sizeof(struct gve_tx_context_cmd_dtype) == 2,
"gve: bad dqo desc struct length");
/*
* TX Native TSO Context DTYPE (0x05)
*
* "flex" fields allow the driver to send additional packet context to HW.
*/
struct gve_tx_tso_context_desc_dqo {
/* The L4 payload bytes that should be segmented. */
uint32_t tso_total_len:24;
uint32_t flex10:8;
/* Max segment size in TSO excluding headers. */
uint16_t mss:14;
uint16_t reserved:2;
uint8_t header_len; /* Header length to use for TSO offload */
uint8_t flex11;
struct gve_tx_context_cmd_dtype cmd_dtype;
uint8_t flex0;
uint8_t flex5;
uint8_t flex6;
uint8_t flex7;
uint8_t flex8;
uint8_t flex9;
} __packed;
_Static_assert(sizeof(struct gve_tx_tso_context_desc_dqo) == 16,
"gve: bad dqo desc struct length");
#define GVE_TX_TSO_CTX_DESC_DTYPE_DQO 0x5
/* General context descriptor for sending metadata. */
struct gve_tx_general_context_desc_dqo {
uint8_t flex4;
uint8_t flex5;
uint8_t flex6;
uint8_t flex7;
uint8_t flex8;
uint8_t flex9;
uint8_t flex10;
uint8_t flex11;
struct gve_tx_context_cmd_dtype cmd_dtype;
uint16_t reserved;
uint8_t flex0;
uint8_t flex1;
uint8_t flex2;
uint8_t flex3;
} __packed;
_Static_assert(sizeof(struct gve_tx_general_context_desc_dqo) == 16,
"gve: bad dqo desc struct length");
#define GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO 0x4
/*
* Logical structure of metadata which is packed into context descriptor flex
* fields.
*/
struct gve_tx_metadata_dqo {
union {
struct {
uint8_t version;
/*
* A zero value means no l4_hash was associated with the
* mbuf.
*/
uint16_t path_hash:15;
/*
* Should be set to 1 if the flow associated with the
* mbuf had a rehash from the TCP stack.
*/
uint16_t rehash_event:1;
} __packed;
uint8_t bytes[12];
};
} __packed;
_Static_assert(sizeof(struct gve_tx_metadata_dqo) == 12,
"gve: bad dqo desc struct length");
#define GVE_TX_METADATA_VERSION_DQO 0
/* TX completion descriptor */
struct gve_tx_compl_desc_dqo {
/* For types 0-4 this is the TX queue ID associated with this
* completion.
*/
uint16_t id:11;
/* See: GVE_COMPL_TYPE_DQO* */
uint16_t type:3;
uint16_t reserved0:1;
/* Flipped by HW to notify the descriptor is populated. */
uint16_t generation:1;
union {
/* For descriptor completions, this is the last index fetched
* by HW + 1.
*/
__le16 tx_head;
/* For packet completions, this is the completion tag set on the
* TX packet descriptors.
*/
__le16 completion_tag;
};
__le32 reserved1;
} __packed;
_Static_assert(sizeof(struct gve_tx_compl_desc_dqo) == 8,
"gve: bad dqo desc struct length");
union gve_tx_desc_dqo {
struct gve_tx_pkt_desc_dqo pkt;
struct gve_tx_tso_context_desc_dqo tso_ctx;
struct gve_tx_general_context_desc_dqo general_ctx;
};
#define GVE_COMPL_TYPE_DQO_PKT 0x2 /* Packet completion */
#define GVE_COMPL_TYPE_DQO_DESC 0x4 /* Descriptor completion */
/* Descriptor to post buffers to HW on buffer queue. */
struct gve_rx_desc_dqo {
__le16 buf_id; /* ID returned in Rx completion descriptor */
__le16 reserved0;
__le32 reserved1;
__le64 buf_addr; /* DMA address of the buffer */
__le64 header_buf_addr;
__le64 reserved2;
} __packed;
_Static_assert(sizeof(struct gve_rx_desc_dqo) == 32,
"gve: bad dqo desc struct length");
/* Descriptor for HW to notify SW of new packets received on RX queue. */
struct gve_rx_compl_desc_dqo {
/* Must be 1 */
uint8_t rxdid:4;
uint8_t reserved0:4;
/* Packet originated from this system rather than the network. */
uint8_t loopback:1;
/* Set when IPv6 packet contains a destination options header or routing
* header.
*/
uint8_t ipv6_ex_add:1;
/* Invalid packet was received. */
uint8_t rx_error:1;
uint8_t reserved1:5;
uint16_t packet_type:10;
uint16_t ip_hdr_err:1;
uint16_t udp_len_err:1;
uint16_t raw_cs_invalid:1;
uint16_t reserved2:3;
uint16_t packet_len:14;
/* Flipped by HW to notify the descriptor is populated. */
uint16_t generation:1;
/* Should be zero. */
uint16_t buffer_queue_id:1;
uint16_t header_len:10;
uint16_t rsc:1;
uint16_t split_header:1;
uint16_t reserved3:4;
uint8_t descriptor_done:1;
uint8_t end_of_packet:1;
uint8_t header_buffer_overflow:1;
uint8_t l3_l4_processed:1;
uint8_t csum_ip_err:1;
uint8_t csum_l4_err:1;
uint8_t csum_external_ip_err:1;
uint8_t csum_external_udp_err:1;
uint8_t status_error1;
__le16 reserved5;
__le16 buf_id; /* Buffer ID which was sent on the buffer queue. */
union {
/* Packet checksum. */
__le16 raw_cs;
/* Segment length for RSC packets. */
__le16 rsc_seg_len;
};
__le32 hash;
__le32 reserved6;
__le64 reserved7;
} __packed;
_Static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32,
"gve: bad dqo desc struct length");
#endif /* _GVE_DESC_DQO_H_ */

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -30,10 +30,11 @@
*/
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"
#define GVE_DRIVER_VERSION "GVE-FBSD-1.0.1\n"
#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.1\n"
#define GVE_VERSION_MAJOR 1
#define GVE_VERSION_MINOR 0
#define GVE_VERSION_MINOR 3
#define GVE_VERSION_SUB 1
#define GVE_DEFAULT_RX_COPYBREAK 256
@ -124,9 +125,11 @@ gve_up(struct gve_priv *priv)
if (if_getcapenable(ifp) & IFCAP_TSO6)
if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
err = gve_register_qpls(priv);
if (err != 0)
goto reset;
if (gve_is_qpl(priv)) {
err = gve_register_qpls(priv);
if (err != 0)
goto reset;
}
err = gve_create_rx_rings(priv);
if (err != 0)
@ -174,10 +177,13 @@ gve_down(struct gve_priv *priv)
if (gve_destroy_tx_rings(priv) != 0)
goto reset;
if (gve_unregister_qpls(priv) != 0)
goto reset;
if (gve_is_qpl(priv)) {
if (gve_unregister_qpls(priv) != 0)
goto reset;
}
gve_mask_all_queue_irqs(priv);
if (gve_is_gqi(priv))
gve_mask_all_queue_irqs(priv);
gve_clear_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
priv->interface_down_cnt++;
return;
@ -367,6 +373,18 @@ gve_setup_ifnet(device_t dev, struct gve_priv *priv)
if_settransmitfn(ifp, gve_xmit_ifp);
if_setqflushfn(ifp, gve_qflush);
/*
* Set TSO limits, must match the arguments to bus_dma_tag_create
* when creating tx->dqo.buf_dmatag. Only applies to the RDA mode
* because in QPL we copy the entire pakcet into the bounce buffer
* and thus it does not matter how fragmented the mbuf is.
*/
if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) {
if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO);
if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO);
}
if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO);
#if __FreeBSD_version >= 1400086
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
#else
@ -449,7 +467,8 @@ gve_free_rings(struct gve_priv *priv)
gve_free_irqs(priv);
gve_free_tx_rings(priv);
gve_free_rx_rings(priv);
gve_free_qpls(priv);
if (gve_is_qpl(priv))
gve_free_qpls(priv);
}
static int
@ -457,9 +476,11 @@ gve_alloc_rings(struct gve_priv *priv)
{
int err;
err = gve_alloc_qpls(priv);
if (err != 0)
goto abort;
if (gve_is_qpl(priv)) {
err = gve_alloc_qpls(priv);
if (err != 0)
goto abort;
}
err = gve_alloc_rx_rings(priv);
if (err != 0)
@ -499,6 +520,11 @@ gve_deconfigure_resources(struct gve_priv *priv)
gve_free_irq_db_array(priv);
gve_free_counter_array(priv);
if (priv->ptype_lut_dqo) {
free(priv->ptype_lut_dqo, M_GVE);
priv->ptype_lut_dqo = NULL;
}
}
static int
@ -525,6 +551,18 @@ gve_configure_resources(struct gve_priv *priv)
goto abort;
}
if (!gve_is_gqi(priv)) {
priv->ptype_lut_dqo = malloc(sizeof(*priv->ptype_lut_dqo), M_GVE,
M_WAITOK | M_ZERO);
err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
if (err != 0) {
device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
err);
goto abort;
}
}
gve_set_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
if (bootverbose)
device_printf(priv->dev, "Configured device resources\n");
@ -742,6 +780,9 @@ gve_attach(device_t dev)
int rid;
int err;
snprintf(gve_version, sizeof(gve_version), "%d.%d.%d",
GVE_VERSION_MAJOR, GVE_VERSION_MINOR, GVE_VERSION_SUB);
priv = device_get_softc(dev);
priv->dev = dev;
GVE_IFACE_LOCK_INIT(priv->gve_iface_lock);

View File

@ -85,6 +85,9 @@
typedef uint16_t __be16;
typedef uint32_t __be32;
typedef uint64_t __be64;
typedef uint16_t __le16;
typedef uint32_t __le32;
typedef uint64_t __le64;
#define BIT(nr) (1UL << (nr))
#define FBSD_VERSION_MAJOR (__FreeBSD_version / 100000)

View File

@ -32,13 +32,14 @@
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"
static MALLOC_DEFINE(M_GVE_QPL, "gve qpl", "gve qpl allocations");
static uint32_t
gve_num_tx_qpls(struct gve_priv *priv)
{
if (priv->queue_format != GVE_GQI_QPL_FORMAT)
if (!gve_is_qpl(priv))
return (0);
return (priv->tx_cfg.max_queues);
@ -47,7 +48,7 @@ gve_num_tx_qpls(struct gve_priv *priv)
static uint32_t
gve_num_rx_qpls(struct gve_priv *priv)
{
if (priv->queue_format != GVE_GQI_QPL_FORMAT)
if (!gve_is_qpl(priv))
return (0);
return (priv->rx_cfg.max_queues);
@ -189,6 +190,7 @@ gve_free_qpls(struct gve_priv *priv)
int gve_alloc_qpls(struct gve_priv *priv)
{
int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
int num_pages;
int err;
int i;
@ -198,15 +200,19 @@ int gve_alloc_qpls(struct gve_priv *priv)
priv->qpls = malloc(num_qpls * sizeof(*priv->qpls), M_GVE_QPL,
M_WAITOK | M_ZERO);
num_pages = gve_is_gqi(priv) ?
priv->tx_desc_cnt / GVE_QPL_DIVISOR :
GVE_TX_NUM_QPL_PAGES_DQO;
for (i = 0; i < gve_num_tx_qpls(priv); i++) {
err = gve_alloc_qpl(priv, i, priv->tx_desc_cnt / GVE_QPL_DIVISOR,
err = gve_alloc_qpl(priv, i, num_pages,
/*single_kva=*/true);
if (err != 0)
goto abort;
}
num_pages = gve_is_gqi(priv) ? priv->rx_desc_cnt : GVE_RX_NUM_QPL_PAGES_DQO;
for (; i < num_qpls; i++) {
err = gve_alloc_qpl(priv, i, priv->rx_desc_cnt, /*single_kva=*/false);
err = gve_alloc_qpl(priv, i, num_pages, /*single_kva=*/false);
if (err != 0)
goto abort;
}
@ -283,3 +289,21 @@ gve_unregister_qpls(struct gve_priv *priv)
gve_clear_state_flag(priv, GVE_STATE_FLAG_QPLREG_OK);
return (0);
}
void
gve_mextadd_free(struct mbuf *mbuf)
{
vm_page_t page = (vm_page_t)mbuf->m_ext.ext_arg1;
vm_offset_t va = (vm_offset_t)mbuf->m_ext.ext_arg2;
/*
* Free the page only if this is the last ref.
* The interface might no longer exist by the time
* this callback is called, see gve_free_qpl.
*/
if (__predict_false(vm_page_unwire_noq(page))) {
pmap_qremove(va, 1);
kva_free(va, PAGE_SIZE);
vm_page_free(page);
}
}

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -30,15 +30,12 @@
*/
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"
static void
gve_rx_free_ring(struct gve_priv *priv, int i)
gve_rx_free_ring_gqi(struct gve_priv *priv, int i)
{
struct gve_rx_ring *rx = &priv->rx[i];
struct gve_ring_com *com = &rx->com;
/* Safe to call even if never allocated */
gve_free_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS);
if (rx->page_info != NULL) {
free(rx->page_info, M_GVE);
@ -54,6 +51,21 @@ gve_rx_free_ring(struct gve_priv *priv, int i)
gve_dma_free_coherent(&rx->desc_ring_mem);
rx->desc_ring = NULL;
}
}
static void
gve_rx_free_ring(struct gve_priv *priv, int i)
{
struct gve_rx_ring *rx = &priv->rx[i];
struct gve_ring_com *com = &rx->com;
/* Safe to call even if never allocated */
gve_free_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS);
if (gve_is_gqi(priv))
gve_rx_free_ring_gqi(priv, i);
else
gve_rx_free_ring_dqo(priv, i);
if (com->q_resources != NULL) {
gve_dma_free_coherent(&com->q_resources_mem);
@ -82,6 +94,52 @@ gve_prefill_rx_slots(struct gve_rx_ring *rx)
BUS_DMASYNC_PREWRITE);
}
static int
gve_rx_alloc_ring_gqi(struct gve_priv *priv, int i)
{
struct gve_rx_ring *rx = &priv->rx[i];
struct gve_ring_com *com = &rx->com;
int err;
err = gve_dma_alloc_coherent(priv,
sizeof(struct gve_rx_desc) * priv->rx_desc_cnt,
CACHE_LINE_SIZE, &rx->desc_ring_mem);
if (err != 0) {
device_printf(priv->dev,
"Failed to alloc desc ring for rx ring %d", i);
goto abort;
}
rx->mask = priv->rx_pages_per_qpl - 1;
rx->desc_ring = rx->desc_ring_mem.cpu_addr;
com->qpl = &priv->qpls[priv->tx_cfg.max_queues + i];
if (com->qpl == NULL) {
device_printf(priv->dev, "No QPL left for rx ring %d", i);
return (ENOMEM);
}
rx->page_info = malloc(priv->rx_desc_cnt * sizeof(*rx->page_info),
M_GVE, M_WAITOK | M_ZERO);
err = gve_dma_alloc_coherent(priv,
sizeof(union gve_rx_data_slot) * priv->rx_desc_cnt,
CACHE_LINE_SIZE, &rx->data_ring_mem);
if (err != 0) {
device_printf(priv->dev,
"Failed to alloc data ring for rx ring %d", i);
goto abort;
}
rx->data_ring = rx->data_ring_mem.cpu_addr;
gve_prefill_rx_slots(rx);
return (0);
abort:
gve_rx_free_ring_gqi(priv, i);
return (err);
}
static int
gve_rx_alloc_ring(struct gve_priv *priv, int i)
{
@ -92,46 +150,24 @@ gve_rx_alloc_ring(struct gve_priv *priv, int i)
com->priv = priv;
com->id = i;
rx->mask = priv->rx_pages_per_qpl - 1;
com->qpl = &priv->qpls[priv->tx_cfg.max_queues + i];
if (com->qpl == NULL) {
device_printf(priv->dev, "No QPL left for rx ring %d", i);
return (ENOMEM);
}
rx->page_info = malloc(priv->rx_desc_cnt * sizeof(*rx->page_info), M_GVE,
M_WAITOK | M_ZERO);
gve_alloc_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS);
err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources),
PAGE_SIZE, &com->q_resources_mem);
if (err != 0) {
device_printf(priv->dev, "Failed to alloc queue resources for rx ring %d", i);
device_printf(priv->dev,
"Failed to alloc queue resources for rx ring %d", i);
goto abort;
}
com->q_resources = com->q_resources_mem.cpu_addr;
err = gve_dma_alloc_coherent(priv,
sizeof(struct gve_rx_desc) * priv->rx_desc_cnt,
CACHE_LINE_SIZE, &rx->desc_ring_mem);
if (err != 0) {
device_printf(priv->dev, "Failed to alloc desc ring for rx ring %d", i);
if (gve_is_gqi(priv))
err = gve_rx_alloc_ring_gqi(priv, i);
else
err = gve_rx_alloc_ring_dqo(priv, i);
if (err != 0)
goto abort;
}
rx->desc_ring = rx->desc_ring_mem.cpu_addr;
err = gve_dma_alloc_coherent(priv,
sizeof(union gve_rx_data_slot) * priv->rx_desc_cnt,
CACHE_LINE_SIZE, &rx->data_ring_mem);
if (err != 0) {
device_printf(priv->dev, "Failed to alloc data ring for rx ring %d", i);
goto abort;
}
rx->data_ring = rx->data_ring_mem.cpu_addr;
gve_prefill_rx_slots(rx);
return (0);
abort:
@ -217,6 +253,11 @@ gve_clear_rx_ring(struct gve_priv *priv, int i)
{
struct gve_rx_ring *rx = &priv->rx[i];
if (!gve_is_gqi(priv)) {
gve_clear_rx_ring_dqo(priv, i);
return;
}
rx->seq_no = 1;
rx->cnt = 0;
rx->fill_cnt = 0;
@ -238,14 +279,21 @@ gve_start_rx_ring(struct gve_priv *priv, int i)
rx->lro.ifp = priv->ifp;
}
NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq, rx);
if (gve_is_gqi(priv))
NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq, rx);
else
NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq_dqo, rx);
com->cleanup_tq = taskqueue_create_fast("gve rx", M_WAITOK,
taskqueue_thread_enqueue, &com->cleanup_tq);
taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET,
"%s rxq %d", device_get_nameunit(priv->dev), i);
gve_db_bar_write_4(priv, com->db_offset, rx->fill_cnt);
if (gve_is_gqi(priv)) {
/* GQ RX bufs are prefilled at ring alloc time */
gve_db_bar_write_4(priv, com->db_offset, rx->fill_cnt);
} else
gve_rx_prefill_buffers_dqo(rx);
}
int
@ -361,24 +409,6 @@ gve_set_rss_type(__be16 flag, struct mbuf *mbuf)
}
}
static void
gve_mextadd_free(struct mbuf *mbuf)
{
vm_page_t page = (vm_page_t)mbuf->m_ext.ext_arg1;
vm_offset_t va = (vm_offset_t)mbuf->m_ext.ext_arg2;
/*
* Free the page only if this is the last ref.
* The interface might no longer exist by the time
* this callback is called, see gve_free_qpl.
*/
if (__predict_false(vm_page_unwire_noq(page))) {
pmap_qremove(va, 1);
kva_free(va, PAGE_SIZE);
vm_page_free(page);
}
}
static void
gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
{

1012
sys/dev/gve/gve_rx_dqo.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -30,6 +30,21 @@
*/
#include "gve.h"
static SYSCTL_NODE(_hw, OID_AUTO, gve, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"GVE driver parameters");
bool gve_disable_hw_lro = false;
SYSCTL_BOOL(_hw_gve, OID_AUTO, disable_hw_lro, CTLFLAG_RDTUN,
&gve_disable_hw_lro, 0, "Controls if hardware LRO is used");
char gve_queue_format[8];
SYSCTL_STRING(_hw_gve, OID_AUTO, queue_format, CTLFLAG_RD,
&gve_queue_format, 0, "Queue format being used by the iface");
char gve_version[8];
SYSCTL_STRING(_hw_gve, OID_AUTO, driver_version, CTLFLAG_RD,
&gve_version, 0, "Driver version");
static void
gve_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
struct sysctl_oid_list *child, struct gve_rx_ring *rxq)
@ -68,10 +83,22 @@ gve_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
"rx_dropped_pkt_desc_err", CTLFLAG_RD,
&stats->rx_dropped_pkt_desc_err,
"Packets dropped due to descriptor error");
SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO,
"rx_dropped_pkt_buf_post_fail", CTLFLAG_RD,
&stats->rx_dropped_pkt_buf_post_fail,
"Packets dropped due to failure to post enough buffers");
SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO,
"rx_dropped_pkt_mbuf_alloc_fail", CTLFLAG_RD,
&stats->rx_dropped_pkt_mbuf_alloc_fail,
"Packets dropped due to failed mbuf allocation");
SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO,
"rx_mbuf_dmamap_err", CTLFLAG_RD,
&stats->rx_mbuf_dmamap_err,
"Number of rx mbufs which couldnt be dma mapped");
SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO,
"rx_mbuf_mclget_null", CTLFLAG_RD,
&stats->rx_mbuf_mclget_null,
"Number of times when there were no cluster mbufs");
SYSCTL_ADD_U32(ctx, list, OID_AUTO,
"rx_completed_desc", CTLFLAG_RD,
&rxq->cnt, 0, "Number of descriptors completed");
@ -113,9 +140,9 @@ gve_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
"tx_bytes", CTLFLAG_RD,
&stats->tbytes, "Bytes transmitted");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_dropped_pkt_nospace_device", CTLFLAG_RD,
&stats->tx_dropped_pkt_nospace_device,
"Packets dropped due to no space in device");
"tx_delayed_pkt_nospace_device", CTLFLAG_RD,
&stats->tx_delayed_pkt_nospace_device,
"Packets delayed due to no space in device");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_dropped_pkt_nospace_bufring", CTLFLAG_RD,
&stats->tx_dropped_pkt_nospace_bufring,
@ -124,6 +151,42 @@ gve_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
"tx_dropped_pkt_vlan", CTLFLAG_RD,
&stats->tx_dropped_pkt_vlan,
"Dropped VLAN packets");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_delayed_pkt_nospace_descring", CTLFLAG_RD,
&stats->tx_delayed_pkt_nospace_descring,
"Packets delayed due to no space in desc ring");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_delayed_pkt_nospace_compring", CTLFLAG_RD,
&stats->tx_delayed_pkt_nospace_compring,
"Packets delayed due to no space in comp ring");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_delayed_pkt_nospace_qpl_bufs", CTLFLAG_RD,
&stats->tx_delayed_pkt_nospace_qpl_bufs,
"Packets delayed due to not enough qpl bufs");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_delayed_pkt_tsoerr", CTLFLAG_RD,
&stats->tx_delayed_pkt_tsoerr,
"TSO packets delayed due to err in prep errors");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_mbuf_collpase", CTLFLAG_RD,
&stats->tx_mbuf_collapse,
"tx mbufs that had to be collpased");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_mbuf_defrag", CTLFLAG_RD,
&stats->tx_mbuf_defrag,
"tx mbufs that had to be defragged");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_mbuf_defrag_err", CTLFLAG_RD,
&stats->tx_mbuf_defrag_err,
"tx mbufs that failed defrag");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_mbuf_dmamap_enomem_err", CTLFLAG_RD,
&stats->tx_mbuf_dmamap_enomem_err,
"tx mbufs that could not be dma-mapped due to low mem");
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
"tx_mbuf_dmamap_err", CTLFLAG_RD,
&stats->tx_mbuf_dmamap_err,
"tx mbufs that could not be dma-mapped");
}
static void
@ -185,6 +248,9 @@ gve_setup_adminq_stat_sysctl(struct sysctl_ctx_list *ctx,
SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_destroy_rx_queue_cnt",
CTLFLAG_RD, &priv->adminq_destroy_rx_queue_cnt, 0,
"adminq_destroy_rx_queue_cnt");
SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_get_ptype_map_cnt",
CTLFLAG_RD, &priv->adminq_get_ptype_map_cnt, 0,
"adminq_get_ptype_map_cnt");
SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO,
"adminq_dcfg_device_resources_cnt", CTLFLAG_RD,
&priv->adminq_dcfg_device_resources_cnt, 0,

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -30,6 +30,7 @@
*/
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"
#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
@ -47,6 +48,22 @@ gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_ring *tx)
return (0);
}
static void
gve_tx_free_ring_gqi(struct gve_priv *priv, int i)
{
struct gve_tx_ring *tx = &priv->tx[i];
if (tx->desc_ring != NULL) {
gve_dma_free_coherent(&tx->desc_ring_mem);
tx->desc_ring = NULL;
}
if (tx->info != NULL) {
free(tx->info, M_GVE);
tx->info = NULL;
}
}
static void
gve_tx_free_ring(struct gve_priv *priv, int i)
{
@ -56,28 +73,61 @@ gve_tx_free_ring(struct gve_priv *priv, int i)
/* Safe to call even if never alloced */
gve_free_counters((counter_u64_t *)&tx->stats, NUM_TX_STATS);
if (tx->br != NULL) {
buf_ring_free(tx->br, M_DEVBUF);
tx->br = NULL;
}
if (mtx_initialized(&tx->ring_mtx))
mtx_destroy(&tx->ring_mtx);
if (tx->info != NULL) {
free(tx->info, M_GVE);
tx->info = NULL;
}
if (tx->desc_ring != NULL) {
gve_dma_free_coherent(&tx->desc_ring_mem);
tx->desc_ring = NULL;
}
if (com->q_resources != NULL) {
gve_dma_free_coherent(&com->q_resources_mem);
com->q_resources = NULL;
}
if (tx->br != NULL) {
buf_ring_free(tx->br, M_DEVBUF);
tx->br = NULL;
}
if (gve_is_gqi(priv))
gve_tx_free_ring_gqi(priv, i);
else
gve_tx_free_ring_dqo(priv, i);
}
static int
gve_tx_alloc_ring_gqi(struct gve_priv *priv, int i)
{
struct gve_tx_ring *tx = &priv->tx[i];
struct gve_ring_com *com = &tx->com;
int err;
err = gve_dma_alloc_coherent(priv,
sizeof(union gve_tx_desc) * priv->tx_desc_cnt,
CACHE_LINE_SIZE, &tx->desc_ring_mem);
if (err != 0) {
device_printf(priv->dev,
"Failed to alloc desc ring for tx ring %d", i);
goto abort;
}
tx->desc_ring = tx->desc_ring_mem.cpu_addr;
com->qpl = &priv->qpls[i];
if (com->qpl == NULL) {
device_printf(priv->dev, "No QPL left for tx ring %d\n", i);
err = ENOMEM;
goto abort;
}
err = gve_tx_fifo_init(priv, tx);
if (err != 0)
goto abort;
tx->info = malloc(
sizeof(struct gve_tx_buffer_state) * priv->tx_desc_cnt,
M_GVE, M_WAITOK | M_ZERO);
return (0);
abort:
gve_tx_free_ring_gqi(priv, i);
return (err);
}
static int
@ -91,19 +141,13 @@ gve_tx_alloc_ring(struct gve_priv *priv, int i)
com->priv = priv;
com->id = i;
com->qpl = &priv->qpls[i];
if (com->qpl == NULL) {
device_printf(priv->dev, "No QPL left for tx ring %d\n", i);
return (ENOMEM);
}
err = gve_tx_fifo_init(priv, tx);
if (gve_is_gqi(priv))
err = gve_tx_alloc_ring_gqi(priv, i);
else
err = gve_tx_alloc_ring_dqo(priv, i);
if (err != 0)
goto abort;
tx->info = malloc(sizeof(struct gve_tx_buffer_state) * priv->tx_desc_cnt,
M_GVE, M_WAITOK | M_ZERO);
sprintf(mtx_name, "gvetx%d", i);
mtx_init(&tx->ring_mtx, mtx_name, NULL, MTX_DEF);
@ -115,20 +159,12 @@ gve_tx_alloc_ring(struct gve_priv *priv, int i)
err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources),
PAGE_SIZE, &com->q_resources_mem);
if (err != 0) {
device_printf(priv->dev, "Failed to alloc queue resources for tx ring %d", i);
device_printf(priv->dev,
"Failed to alloc queue resources for tx ring %d", i);
goto abort;
}
com->q_resources = com->q_resources_mem.cpu_addr;
err = gve_dma_alloc_coherent(priv,
sizeof(union gve_tx_desc) * priv->tx_desc_cnt,
CACHE_LINE_SIZE, &tx->desc_ring_mem);
if (err != 0) {
device_printf(priv->dev, "Failed to alloc desc ring for tx ring %d", i);
goto abort;
}
tx->desc_ring = tx->desc_ring_mem.cpu_addr;
return (0);
abort:
@ -204,12 +240,15 @@ gve_clear_tx_ring(struct gve_priv *priv, int i)
}
static void
gve_start_tx_ring(struct gve_priv *priv, int i)
gve_start_tx_ring(struct gve_priv *priv, int i,
void (cleanup) (void *arg, int pending))
{
struct gve_tx_ring *tx = &priv->tx[i];
struct gve_ring_com *com = &tx->com;
NET_TASK_INIT(&com->cleanup_task, 0, gve_tx_cleanup_tq, tx);
atomic_store_bool(&tx->stopped, false);
NET_TASK_INIT(&com->cleanup_task, 0, cleanup, tx);
com->cleanup_tq = taskqueue_create_fast("gve tx", M_WAITOK,
taskqueue_thread_enqueue, &com->cleanup_tq);
taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET, "%s txq %d",
@ -233,8 +272,12 @@ gve_create_tx_rings(struct gve_priv *priv)
if (gve_get_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK))
return (0);
for (i = 0; i < priv->tx_cfg.num_queues; i++)
gve_clear_tx_ring(priv, i);
for (i = 0; i < priv->tx_cfg.num_queues; i++) {
if (gve_is_gqi(priv))
gve_clear_tx_ring(priv, i);
else
gve_clear_tx_ring_dqo(priv, i);
}
err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
if (err != 0)
@ -254,7 +297,10 @@ gve_create_tx_rings(struct gve_priv *priv)
com->db_offset = 4 * be32toh(com->q_resources->db_index);
com->counter_idx = be32toh(com->q_resources->counter_index);
gve_start_tx_ring(priv, i);
if (gve_is_gqi(priv))
gve_start_tx_ring(priv, i, gve_tx_cleanup_tq);
else
gve_start_tx_ring(priv, i, gve_tx_cleanup_tq_dqo);
}
gve_set_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK);
@ -383,6 +429,11 @@ gve_tx_cleanup_tq(void *arg, int pending)
gve_db_bar_write_4(priv, tx->com.irq_db_offset, GVE_IRQ_MASK);
taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
}
if (atomic_load_bool(&tx->stopped) && space_freed) {
atomic_store_bool(&tx->stopped, false);
taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
}
}
static void
@ -627,8 +678,7 @@ gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf)
bytes_required = gve_fifo_bytes_required(tx, first_seg_len, pkt_len);
if (__predict_false(!gve_can_tx(tx, bytes_required))) {
counter_enter();
counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_device, 1);
counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1);
counter_u64_add_protected(tx->stats.tx_delayed_pkt_nospace_device, 1);
counter_exit();
return (ENOBUFS);
}
@ -689,19 +739,86 @@ gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf)
return (0);
}
static int
gve_xmit_mbuf(struct gve_tx_ring *tx,
struct mbuf **mbuf)
{
if (gve_is_gqi(tx->com.priv))
return (gve_xmit(tx, *mbuf));
if (gve_is_qpl(tx->com.priv))
return (gve_xmit_dqo_qpl(tx, *mbuf));
/*
* gve_xmit_dqo might attempt to defrag the mbuf chain.
* The reference is passed in so that in the case of
* errors, the new mbuf chain is what's put back on the br.
*/
return (gve_xmit_dqo(tx, mbuf));
}
/*
* Has the side-effect of stopping the xmit queue by setting tx->stopped
*/
static int
gve_xmit_retry_enobuf_mbuf(struct gve_tx_ring *tx,
struct mbuf **mbuf)
{
int err;
atomic_store_bool(&tx->stopped, true);
/*
* Room made in the queue BEFORE the barrier will be seen by the
* gve_xmit_mbuf retry below.
*
* If room is made in the queue AFTER the barrier, the cleanup tq
* iteration creating the room will either see a tx->stopped value
* of 0 or the 1 we just wrote:
*
* If it sees a 1, then it would enqueue the xmit tq. Enqueue
* implies a retry on the waiting pkt.
*
* If it sees a 0, then that implies a previous iteration overwrote
* our 1, and that iteration would enqueue the xmit tq. Enqueue
* implies a retry on the waiting pkt.
*/
atomic_thread_fence_seq_cst();
err = gve_xmit_mbuf(tx, mbuf);
if (err == 0)
atomic_store_bool(&tx->stopped, false);
return (err);
}
static void
gve_xmit_br(struct gve_tx_ring *tx)
{
struct gve_priv *priv = tx->com.priv;
struct ifnet *ifp = priv->ifp;
struct mbuf *mbuf;
int err;
while ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0 &&
(mbuf = drbr_peek(ifp, tx->br)) != NULL) {
err = gve_xmit_mbuf(tx, &mbuf);
if (__predict_false(gve_xmit(tx, mbuf) != 0)) {
drbr_putback(ifp, tx->br, mbuf);
taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
/*
* We need to stop this taskqueue when we can't xmit the pkt due
* to lack of space in the NIC ring (ENOBUFS). The retry exists
* to guard against a TOCTTOU bug that could end up freezing the
* queue forever.
*/
if (__predict_false(mbuf != NULL && err == ENOBUFS))
err = gve_xmit_retry_enobuf_mbuf(tx, &mbuf);
if (__predict_false(err != 0 && mbuf != NULL)) {
if (err == EINVAL) {
drbr_advance(ifp, tx->br);
m_freem(mbuf);
} else
drbr_putback(ifp, tx->br, mbuf);
break;
}
@ -710,7 +827,12 @@ gve_xmit_br(struct gve_tx_ring *tx)
bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
BUS_DMASYNC_PREWRITE);
gve_db_bar_write_4(priv, tx->com.db_offset, tx->req);
if (gve_is_gqi(priv))
gve_db_bar_write_4(priv, tx->com.db_offset, tx->req);
else
gve_db_bar_dqo_write_4(priv, tx->com.db_offset,
tx->dqo.desc_tail);
}
}
@ -763,7 +885,8 @@ gve_xmit_ifp(if_t ifp, struct mbuf *mbuf)
is_br_empty = drbr_empty(ifp, tx->br);
err = drbr_enqueue(ifp, tx->br, mbuf);
if (__predict_false(err != 0)) {
taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
if (!atomic_load_bool(&tx->stopped))
taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
counter_enter();
counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_bufring, 1);
counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1);
@ -778,9 +901,8 @@ gve_xmit_ifp(if_t ifp, struct mbuf *mbuf)
if (is_br_empty && (GVE_RING_TRYLOCK(tx) != 0)) {
gve_xmit_br(tx);
GVE_RING_UNLOCK(tx);
} else {
} else if (!atomic_load_bool(&tx->stopped))
taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
}
return (0);
}

1090
sys/dev/gve/gve_tx_dqo.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2023 Google LLC
* Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@ -29,6 +29,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "gve.h"
#include "gve_dqo.h"
uint32_t
gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset)
@ -48,6 +49,12 @@ gve_db_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val)
bus_write_4(priv->db_bar, offset, htobe32(val));
}
void
gve_db_bar_dqo_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val)
{
bus_write_4(priv->db_bar, offset, val);
}
void
gve_alloc_counters(counter_u64_t *stat, int num_stats)
{
@ -307,7 +314,8 @@ gve_alloc_irqs(struct gve_priv *priv)
}
err = bus_setup_intr(priv->dev, irq->res, INTR_TYPE_NET | INTR_MPSAFE,
gve_tx_intr, NULL, &priv->tx[i], &irq->cookie);
gve_is_gqi(priv) ? gve_tx_intr : gve_tx_intr_dqo, NULL,
&priv->tx[i], &irq->cookie);
if (err != 0) {
device_printf(priv->dev, "Failed to setup irq %d for Tx queue %d, "
"err: %d\n", rid, i, err);
@ -334,7 +342,8 @@ gve_alloc_irqs(struct gve_priv *priv)
}
err = bus_setup_intr(priv->dev, irq->res, INTR_TYPE_NET | INTR_MPSAFE,
gve_rx_intr, NULL, &priv->rx[j], &irq->cookie);
gve_is_gqi(priv) ? gve_rx_intr : gve_rx_intr_dqo, NULL,
&priv->rx[j], &irq->cookie);
if (err != 0) {
device_printf(priv->dev, "Failed to setup irq %d for Rx queue %d, "
"err: %d\n", rid, j, err);
@ -374,6 +383,24 @@ abort:
return (err);
}
/*
* Builds register value to write to DQO IRQ doorbell to enable with specified
* ITR interval.
*/
static uint32_t
gve_setup_itr_interval_dqo(uint32_t interval_us)
{
uint32_t result = GVE_ITR_ENABLE_BIT_DQO;
/* Interval has 2us granularity. */
interval_us >>= 1;
interval_us &= GVE_ITR_INTERVAL_DQO_MASK;
result |= (interval_us << GVE_ITR_INTERVAL_DQO_SHIFT);
return (result);
}
void
gve_unmask_all_queue_irqs(struct gve_priv *priv)
{
@ -383,11 +410,20 @@ gve_unmask_all_queue_irqs(struct gve_priv *priv)
for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
tx = &priv->tx[idx];
gve_db_bar_write_4(priv, tx->com.irq_db_offset, 0);
if (gve_is_gqi(priv))
gve_db_bar_write_4(priv, tx->com.irq_db_offset, 0);
else
gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset,
gve_setup_itr_interval_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO));
}
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
rx = &priv->rx[idx];
gve_db_bar_write_4(priv, rx->com.irq_db_offset, 0);
if (gve_is_gqi(priv))
gve_db_bar_write_4(priv, rx->com.irq_db_offset, 0);
else
gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
gve_setup_itr_interval_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO));
}
}

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: BSD-3-Clause
#
# Copyright (c) 2023 Google LLC
# Copyright (c) 2023-2024 Google LLC
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
@ -30,7 +30,15 @@
.PATH: ${SRCTOP}/sys/dev/gve
KMOD= if_gve
SRCS= gve_main.c gve_adminq.c gve_utils.c gve_qpl.c gve_rx.c gve_tx.c gve_sysctl.c
SRCS= gve_main.c \
gve_adminq.c \
gve_utils.c \
gve_qpl.c \
gve_rx.c \
gve_rx_dqo.c \
gve_tx.c \
gve_tx_dqo.c \
gve_sysctl.c
SRCS+= device_if.h bus_if.h pci_if.h
.include <bsd.kmod.mk>

View File

@ -1613,7 +1613,7 @@ pf_handle_get_addr(struct nlmsghdr *hdr, struct nl_pstate *npt)
return (ENOMEM);
ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr);
ghdr_new->cmd = PFNL_CMD_GET_ADDRS;
ghdr_new->cmd = PFNL_CMD_GET_ADDR;
ghdr_new->version = 0;
ghdr_new->reserved = 0;

View File

@ -87,7 +87,7 @@ enum {
};
#define HVM_CALLBACK_VECTOR(vector) \
(((uint64_t)HVM_CB_TYPE_VECTOR << HVM_CB_TYPE_SHIFT) \
| (((vector) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT))
| (((vector) & HVM_CB_VECTOR_VECTOR_MASK) << HVM_CB_VECTOR_VECTOR_SHIFT))
enum xen_hvm_init_type {
XEN_HVM_INIT_EARLY,

View File

@ -33,6 +33,7 @@ logging.getLogger("scapy").setLevel(logging.CRITICAL)
import math
import scapy.all as sp
import sys
import socket
from copy import copy
from sniffer import Sniffer
@ -227,10 +228,12 @@ def check_ipv6(expect_params, packet):
if not ip6:
LOGGER.debug('Packet is not IPv6!')
return False
if src_address and ip6.src != src_address:
if src_address and socket.inet_pton(socket.AF_INET6, ip6.src) != \
socket.inet_pton(socket.AF_INET6, src_address):
LOGGER.debug(f'Wrong IPv6 source {ip6.src}, expected {src_address}')
return False
if dst_address and ip6.dst != dst_address:
if dst_address and socket.inet_pton(socket.AF_INET6, ip6.dst) != \
socket.inet_pton(socket.AF_INET6, dst_address):
LOGGER.debug(f'Wrong IPv6 destination {ip6.dst}, expected {dst_address}')
return False
# IPv6 has no IP-level checksum.

View File

@ -52,7 +52,19 @@ source_track_body()
"pass out keep state (source-track)"
ping -c 3 192.0.2.1
jexec alcatraz pfctl -s all -v
atf_check -s exit:0 -o match:'192.0.2.2 -> 0.0.0.0 \( states 1,.*' \
jexec alcatraz pfctl -sS
# Flush all source nodes
jexec alcatraz pfctl -FS
# We can't find the previous source node any more
atf_check -s exit:0 -o not-match:'192.0.2.2 -> 0.0.0.0 \( states 1,.*' \
jexec alcatraz pfctl -sS
# But we still have the state
atf_check -s exit:0 -o match:'all icmp 192.0.2.1:8 <- 192.0.2.2:.*' \
jexec alcatraz pfctl -ss
}
source_track_cleanup()
@ -60,6 +72,61 @@ source_track_cleanup()
pft_cleanup
}
atf_test_case "kill" "cleanup"
kill_head()
{
atf_set descr 'Test killing source nodes'
atf_set require.user root
}
kill_body()
{
pft_init
epair=$(vnet_mkepair)
vnet_mkjail alcatraz ${epair}b
ifconfig ${epair}a 192.0.2.2/24 up
ifconfig ${epair}a inet alias 192.0.2.3/24 up
jexec alcatraz ifconfig ${epair}b 192.0.2.1/24 up
# Enable pf!
jexec alcatraz pfctl -e
pft_set_rules alcatraz \
"pass in keep state (source-track)" \
"pass out keep state (source-track)"
# Establish two sources
atf_check -s exit:0 -o ignore \
ping -c 1 -S 192.0.2.2 192.0.2.1
atf_check -s exit:0 -o ignore \
ping -c 1 -S 192.0.2.3 192.0.2.1
# Check that both source nodes exist
atf_check -s exit:0 -o match:'192.0.2.2 -> 0.0.0.0 \( states 1,.*' \
jexec alcatraz pfctl -sS
atf_check -s exit:0 -o match:'192.0.2.3 -> 0.0.0.0 \( states 1,.*' \
jexec alcatraz pfctl -sS
jexec alcatraz pfctl -sS
# Kill the 192.0.2.2 source
jexec alcatraz pfctl -K 192.0.2.2
# The other source still exists
atf_check -s exit:0 -o match:'192.0.2.3 -> 0.0.0.0 \( states 1,.*' \
jexec alcatraz pfctl -sS
# But not the one we killed
atf_check -s exit:0 -o not-match:'192.0.2.2 -> 0.0.0.0 \( states 1,.*' \
jexec alcatraz pfctl -sS
}
kill_cleanup()
{
pft_cleanup
}
max_src_conn_rule_head()
{
@ -188,6 +255,7 @@ max_src_states_rule_cleanup()
atf_init_test_cases()
{
atf_add_test_case "source_track"
atf_add_test_case "kill"
atf_add_test_case "max_src_conn_rule"
atf_add_test_case "max_src_states_rule"
}

View File

@ -4,7 +4,18 @@ PACKAGE= kerberos
PROG= compile_et
SRCS= compile_et.c parse.y lex.l
LIBADD= roken vers
.if defined(BOOTSTRAPPING)
# compiler_et does not need the full libroken but just a bunch of the file
# in there, the buildsystem we have will trigger the full dependency chain
# when linking statically including libcrypt, which is a ldscript, not
# supported by macOS ar(1).
LIBROKEN_A= ${.OBJDIR:H:H}/kerberos5/lib/libroken/libroken.a
LDADD= ${LIBROKEN_A}
DPADD= ${LIBROKEN_A}
.else
LIBADD= roken
.endif
LIBADD+= vers
CFLAGS+=-I. -I${SRCTOP}/contrib/com_err
WARNS?= 0