src/sys/net/ifq.h

499 lines
15 KiB
C

/* $OpenBSD: ifq.h,v 1.41 2023/11/10 15:51:24 bluhm Exp $ */
/*
* Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef _NET_IFQ_H_
#define _NET_IFQ_H_
struct ifnet;
struct kstat;
struct ifq_ops;
struct ifqueue {
struct ifnet *ifq_if;
struct taskq *ifq_softnet;
union {
void *_ifq_softc;
/*
* a rings sndq is found by looking up an array of pointers.
* by default we only have one sndq and the default drivers
* dont use ifq_softc, so we can borrow it for the map until
* we need to allocate a proper map.
*/
struct ifqueue *_ifq_ifqs[1];
} _ifq_ptr;
#define ifq_softc _ifq_ptr._ifq_softc
#define ifq_ifqs _ifq_ptr._ifq_ifqs
/* mbuf handling */
struct mutex ifq_mtx;
const struct ifq_ops *ifq_ops;
void *ifq_q;
struct mbuf_list ifq_free;
unsigned int ifq_len;
unsigned int ifq_oactive;
/* statistics */
uint64_t ifq_packets;
uint64_t ifq_bytes;
uint64_t ifq_qdrops;
uint64_t ifq_errors;
uint64_t ifq_mcasts;
uint32_t ifq_oactives;
struct kstat *ifq_kstat;
/* work serialisation */
struct mutex ifq_task_mtx;
struct task_list ifq_task_list;
void *ifq_serializer;
struct task ifq_bundle;
/* work to be serialised */
struct task ifq_start;
struct task ifq_restart;
/* properties */
unsigned int ifq_maxlen;
unsigned int ifq_idx;
};
struct ifiqueue {
struct ifnet *ifiq_if;
struct taskq *ifiq_softnet;
union {
void *_ifiq_softc;
struct ifiqueue *_ifiq_ifiqs[1];
} _ifiq_ptr;
#define ifiq_softc _ifiq_ptr._ifiq_softc
#define ifiq_ifiqs _ifiq_ptr._ifiq_ifiqs
struct mutex ifiq_mtx;
struct mbuf_list ifiq_ml;
struct task ifiq_task;
unsigned int ifiq_pressure;
/* counters */
uint64_t ifiq_packets;
uint64_t ifiq_bytes;
uint64_t ifiq_fdrops;
uint64_t ifiq_qdrops;
uint64_t ifiq_errors;
uint64_t ifiq_mcasts;
uint64_t ifiq_noproto;
/* number of times a list of packets were put on ifiq_ml */
uint64_t ifiq_enqueues;
/* number of times a list of packets were pulled off ifiq_ml */
uint64_t ifiq_dequeues;
struct kstat *ifiq_kstat;
/* properties */
unsigned int ifiq_idx;
};
#ifdef _KERNEL
#define IFQ_MAXLEN 256
/*
*
* Interface Send Queues
*
* struct ifqueue sits between the network stack and a drivers
* transmission of packets. The high level view is that when the stack
* has finished generating a packet it hands it to a driver for
* transmission. It does this by queueing the packet on an ifqueue and
* notifying the driver to start transmission of the queued packets.
*
* A network device may have multiple contexts for the transmission
* of packets, ie, independent transmit rings. Such a network device,
* represented by a struct ifnet, would then have multiple ifqueue
* structures, each of which maps to an independent transmit ring.
*
* struct ifqueue also provides the point where conditioning of
* traffic (ie, priq and hfsc) is implemented, and provides some
* infrastructure to assist in the implementation of network drivers.
*
* = ifq API
*
* The ifq API provides functions for three distinct consumers:
*
* 1. The network stack
* 2. Traffic QoS/conditioning implementations
* 3. Network drivers
*
* == Network Stack API
*
* The network stack is responsible for initialising and destroying
* the ifqueue structures, changing the traffic conditioner on an
* interface, enqueuing packets for transmission, and notifying
* the driver to start transmission of a particular ifqueue.
*
* === ifq_init()
*
* During if_attach(), the network stack calls ifq_init to initialise
* the ifqueue structure. By default it configures the priq traffic
* conditioner.
*
* === ifq_destroy()
*
* The network stack calls ifq_destroy() during if_detach to tear down
* the ifqueue structure. It frees the traffic conditioner state, and
* frees any mbufs that were left queued.
*
* === ifq_attach()
*
* ifq_attach() is used to replace the current traffic conditioner on
* the ifqueue. All the pending mbufs are removed from the previous
* conditioner and requeued on the new.
*
* === ifq_idx()
*
* ifq_idx() selects a specific ifqueue from the current ifnet
* structure for use in the transmission of the mbuf.
*
* === ifq_enqueue()
*
* ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The
* current traffic conditioner may drop a packet to make space on the
* queue.
*
* === ifq_start()
*
* Once a packet has been successfully queued with ifq_enqueue(),
* the network card is notified with a call to ifq_start().
* Calls to ifq_start() run in the ifqueue serialisation context,
* guaranteeing that only one instance of ifp->if_qstart() will be
* running on behalf of a specific ifqueue in the system at any point
* in time.
*
* == Traffic conditioners API
*
* The majority of interaction between struct ifqueue and a traffic
* conditioner occurs via the callbacks a traffic conditioner provides
* in an instance of struct ifq_ops.
*
* XXX document ifqop_*
*
* The ifqueue API implements the locking on behalf of the conditioning
* implementations so conditioners only have to reject or keep mbufs.
* If something needs to inspect a conditioners internals, the queue lock
* needs to be taken to allow for a consistent or safe view. The queue
* lock may be taken and released with ifq_q_enter() and ifq_q_leave().
*
* === ifq_q_enter()
*
* Code wishing to access a conditioners internals may take the queue
* lock with ifq_q_enter(). The caller must pass a reference to the
* conditioners ifq_ops structure so the infrastructure can ensure the
* caller is able to understand the internals. ifq_q_enter() returns
* a pointer to the conditioners internal structures, or NULL if the
* ifq_ops did not match the current conditioner.
*
* === ifq_q_leave()
*
* The queue lock acquired with ifq_q_enter() is released with
* ifq_q_leave().
*
* === ifq_mfreem() and ifq_mfreeml()
*
* A goal of the API is to avoid freeing an mbuf while mutexes are
* held. Because the ifq API manages the lock on behalf of the backend
* ifqops, the backend should not directly free mbufs. If a conditioner
* backend needs to drop a packet during the handling of ifqop_deq_begin,
* it may free it by calling ifq_mfreem(). This accounts for the drop,
* and schedules the free of the mbuf outside the hold of ifq_mtx.
* ifq_mfreeml() takes an mbuf list as an argument instead.
*
*
* == Network Driver API
*
* The API used by network drivers is mostly documented in the
* ifq_dequeue(9) manpage except for ifq_serialize().
*
* === ifq_serialize()
*
* A driver may run arbitrary work in the ifqueue serialiser context
* via ifq_serialize(). The work to be done is represented by a task
* that has been prepared with task_set.
*
* The work will be run in series with any other work dispatched by
* ifq_start(), ifq_restart(), or other ifq_serialize() calls.
*
* Because the work may be run on another CPU, the lifetime of the
* task and the work it represents can extend beyond the end of the
* call to ifq_serialize() that dispatched it.
*
*
* = ifqueue work serialisation
*
* ifqueues provide a mechanism to dispatch work to be run in a single
* context. Work in this mechanism is represented by task structures.
*
* The tasks are run in a context similar to a taskq serviced by a
* single kernel thread, except the work is run immediately by the
* first CPU that dispatches work. If a second CPU attempts to dispatch
* additional tasks while the first is still running, it will be queued
* to be run by the first CPU. The second CPU will return immediately.
*
* = MP Safe Network Drivers
*
* An MP safe network driver is one in which its start routine can be
* called by the network stack without holding the big kernel lock.
*
* == Attach
*
* A driver advertises its ability to run its start routine without
* the kernel lock by setting the IFXF_MPSAFE flag in ifp->if_xflags
* before calling if_attach(). Advertising an MPSAFE start routine
* also implies that the driver understands that a network card can
* have multiple rings or transmit queues, and therefore provides
* if_qstart function (which takes an ifqueue pointer) instead of an
* if_start function (which takes an ifnet pointer).
*
* If the hardware supports multiple transmit rings, it advertises
* support for multiple rings to the network stack with if_attach_queues()
* after the call to if_attach(). if_attach_queues allocates a struct
* ifqueue for each hardware ring, which can then be initialised by
* the driver with data for each ring.
*
* void drv_start(struct ifqueue *);
*
* void
* drv_attach()
* {
* ...
* ifp->if_xflags = IFXF_MPSAFE;
* ifp->if_qstart = drv_start;
* if_attach(ifp);
*
* if_attach_queues(ifp, DRV_NUM_TX_RINGS);
* for (i = 0; i < DRV_NUM_TX_RINGS; i++) {
* struct ifqueue *ifq = ifp->if_ifqs[i];
* struct drv_tx_ring *ring = &sc->sc_tx_rings[i];
*
* ifq->ifq_softc = ring;
* ring->ifq = ifq;
* }
* }
*
* The network stack will then call ifp->if_qstart via ifq_start()
* to guarantee there is only one instance of that function running
* for each ifq in the system, and to serialise it with other work
* the driver may provide.
*
* == Initialise
*
* When the stack requests an interface be brought up (ie, drv_ioctl()
* is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags)
* drivers should set IFF_RUNNING in ifp->if_flags, and then call
* ifq_clr_oactive() against each ifq.
*
* == if_start
*
* ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that
* ifq_is_oactive() does not return true, and that there are pending
* packets to transmit via a call to ifq_len(). Therefore, drivers are
* no longer responsible for doing this themselves.
*
* If a driver should not transmit packets while its link is down, use
* ifq_purge() to flush pending packets from the transmit queue.
*
* Drivers for hardware should use the following pattern to transmit
* packets:
*
* void
* drv_start(struct ifqueue *ifq)
* {
* struct drv_tx_ring *ring = ifq->ifq_softc;
* struct ifnet *ifp = ifq->ifq_if;
* struct drv_softc *sc = ifp->if_softc;
* struct mbuf *m;
* int kick = 0;
*
* if (NO_LINK) {
* ifq_purge(ifq);
* return;
* }
*
* for (;;) {
* if (NO_SPACE(ring)) {
* ifq_set_oactive(ifq);
* break;
* }
*
* m = ifq_dequeue(ifq);
* if (m == NULL)
* break;
*
* if (drv_encap(sc, ring, m) != 0) { // map and fill ring
* m_freem(m);
* continue;
* }
*
* bpf_mtap();
* }
*
* drv_kick(ring); // notify hw of new descriptors on the ring
* }
*
* == Transmission completion
*
* The following pattern should be used for transmit queue interrupt
* processing:
*
* void
* drv_txeof(struct drv_tx_ring *ring)
* {
* struct ifqueue *ifq = ring->ifq;
*
* while (COMPLETED_PKTS(ring)) {
* // unmap packets, m_freem() the mbufs.
* }
*
* if (ifq_is_oactive(ifq))
* ifq_restart(ifq);
* }
*
* == Stop
*
* Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags)
* should clear IFF_RUNNING in ifp->if_flags, and guarantee the start
* routine is not running before freeing any resources it uses:
*
* void
* drv_down(struct drv_softc *sc)
* {
* struct ifnet *ifp = &sc->sc_if;
* struct ifqueue *ifq;
* int i;
*
* CLR(ifp->if_flags, IFF_RUNNING);
* DISABLE_INTERRUPTS();
*
* for (i = 0; i < sc->sc_num_queues; i++) {
* ifq = ifp->if_ifqs[i];
* ifq_barrier(ifq);
* }
*
* intr_barrier(sc->sc_ih);
*
* FREE_RESOURCES();
*
* for (i = 0; i < sc->sc_num_queues; i++) {
* ifq = ifp->if_ifqs[i];
* ifq_clr_oactive(ifq);
* }
* }
*
*/
struct ifq_ops {
unsigned int (*ifqop_idx)(unsigned int,
const struct mbuf *);
struct mbuf *(*ifqop_enq)(struct ifqueue *, struct mbuf *);
struct mbuf *(*ifqop_deq_begin)(struct ifqueue *, void **);
void (*ifqop_deq_commit)(struct ifqueue *,
struct mbuf *, void *);
void (*ifqop_purge)(struct ifqueue *,
struct mbuf_list *);
void *(*ifqop_alloc)(unsigned int, void *);
void (*ifqop_free)(unsigned int, void *);
};
extern const struct ifq_ops * const ifq_priq_ops;
/*
* Interface send queues.
*/
void ifq_init(struct ifqueue *, struct ifnet *, unsigned int);
void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
void ifq_destroy(struct ifqueue *);
void ifq_add_data(struct ifqueue *, struct if_data *);
int ifq_enqueue(struct ifqueue *, struct mbuf *);
void ifq_start(struct ifqueue *);
struct mbuf *ifq_deq_begin(struct ifqueue *);
void ifq_deq_commit(struct ifqueue *, struct mbuf *);
void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
struct mbuf *ifq_dequeue(struct ifqueue *);
int ifq_hdatalen(struct ifqueue *);
void ifq_init_maxlen(struct ifqueue *, unsigned int);
void ifq_mfreem(struct ifqueue *, struct mbuf *);
void ifq_mfreeml(struct ifqueue *, struct mbuf_list *);
unsigned int ifq_purge(struct ifqueue *);
void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
void ifq_q_leave(struct ifqueue *, void *);
void ifq_serialize(struct ifqueue *, struct task *);
void ifq_barrier(struct ifqueue *);
void ifq_set_oactive(struct ifqueue *);
int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
const char *, volatile unsigned int *,
volatile unsigned int *);
#define ifq_len(_ifq) READ_ONCE((_ifq)->ifq_len)
#define ifq_empty(_ifq) (ifq_len(_ifq) == 0)
static inline int
ifq_is_priq(struct ifqueue *ifq)
{
return (ifq->ifq_ops == ifq_priq_ops);
}
static inline void
ifq_clr_oactive(struct ifqueue *ifq)
{
ifq->ifq_oactive = 0;
}
static inline unsigned int
ifq_is_oactive(struct ifqueue *ifq)
{
return (ifq->ifq_oactive);
}
static inline void
ifq_restart(struct ifqueue *ifq)
{
ifq_serialize(ifq, &ifq->ifq_restart);
}
static inline unsigned int
ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m)
{
return ((*ifq->ifq_ops->ifqop_idx)(nifqs, m));
}
/* ifiq */
void ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int);
void ifiq_destroy(struct ifiqueue *);
int ifiq_input(struct ifiqueue *, struct mbuf_list *);
int ifiq_enqueue(struct ifiqueue *, struct mbuf *);
void ifiq_add_data(struct ifiqueue *, struct if_data *);
#define ifiq_len(_ifiq) READ_ONCE(ml_len(&(_ifiq)->ifiq_ml))
#define ifiq_empty(_ifiq) (ifiq_len(_ifiq) == 0)
#endif /* _KERNEL */
#endif /* _NET_IFQ_H_ */