From a92c4bb62ad107d867863f04937f65f23bce0837 Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Thu, 22 Oct 2020 09:47:12 +0000 Subject: [PATCH] Add support for IP over infiniband, IPoIB, to lagg(4). Currently only the failover protocol is supported due to limitations in the IPoIB architecture. Refer to the lagg(4) manual page for how to configure and use this new feature. A new network interface type, IFT_INFINIBANDLAG, has been added, similar to the existing IFT_IEEE8023ADLAG . ifconfig(8) has been updated to accept a new laggtype argument when creating lagg(4) network interfaces. This new argument is used to distinguish between ethernet and infiniband type of lagg(4) network interface. The laggtype argument is optional and defaults to ethernet. The lagg(4) command line syntax is backwards compatible. Differential Revision: https://reviews.freebsd.org/D26254 Reviewed by: melifaro@ MFC after: 1 week Sponsored by: Mellanox Technologies // NVIDIA Networking --- sbin/ifconfig/ifconfig.8 | 8 +- sbin/ifconfig/iflagg.c | 31 ++++- share/man/man4/lagg.4 | 11 +- sys/net/ieee8023ad_lacp.c | 1 + sys/net/if_ethersubr.c | 6 +- sys/net/if_infiniband.c | 14 ++ sys/net/if_lagg.c | 273 ++++++++++++++++++++++++++++++++------ sys/net/if_lagg.h | 37 +++++- sys/net/if_types.h | 1 + 9 files changed, 335 insertions(+), 47 deletions(-) diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index 2978fbdb5691..da7e3f049a59 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd September 17, 2020 +.Dd October 21, 2020 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2481,6 +2481,12 @@ Set to 0 to disable. .Pp The following parameters are specific to lagg interfaces: .Bl -tag -width indent +.It Cm laggtype Ar type +When creating a lagg interface the type can be specified as either +.Cm ethernet +or +.Cm infiniband . +If not specified ethernet is the default lagg type. .It Cm laggport Ar interface Add the interface named by .Ar interface diff --git a/sbin/ifconfig/iflagg.c b/sbin/ifconfig/iflagg.c index a892ffd21eb0..63a5928a5d44 100644 --- a/sbin/ifconfig/iflagg.c +++ b/sbin/ifconfig/iflagg.c @@ -30,7 +30,11 @@ static const char rcsid[] = #include "ifconfig.h" -char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */ +static struct iflaggparam params = { + .lagg_type = LAGG_TYPE_DEFAULT, +}; + +static char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */ static void setlaggport(const char *val, int d, int s, const struct afswtch *afp) @@ -301,7 +305,31 @@ lagg_status(int s) } } +static +DECL_CMD_FUNC(setlaggtype, arg, d) +{ + static const struct lagg_types lt[] = LAGG_TYPES; + int i; + + for (i = 0; i < nitems(lt); i++) { + if (strcmp(arg, lt[i].lt_name) == 0) { + params.lagg_type = lt[i].lt_value; + return; + } + } + errx(1, "invalid lagg type: %s", arg); +} + +static void +lagg_create(int s, struct ifreq *ifr) +{ + ifr->ifr_data = (caddr_t) ¶ms; + if (ioctl(s, SIOCIFCREATE2, ifr) < 0) + err(1, "SIOCIFCREATE2"); +} + static struct cmd lagg_cmds[] = { + DEF_CLONE_CMD_ARG("laggtype", setlaggtype), DEF_CMD_ARG("laggport", setlaggport), DEF_CMD_ARG("-laggport", unsetlaggport), DEF_CMD_ARG("laggproto", setlaggproto), @@ -335,4 +363,5 @@ lagg_ctor(void) for (i = 0; i < nitems(lagg_cmds); i++) cmd_register(&lagg_cmds[i]); af_register(&af_lagg); + clone_setdefcallback("lagg", lagg_create); } diff --git a/share/man/man4/lagg.4 b/share/man/man4/lagg.4 index 934593551905..f7d8e1c5e1f5 100644 --- a/share/man/man4/lagg.4 +++ b/share/man/man4/lagg.4 @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 18, 2017 +.Dd October 21, 2020 .Dt LAGG 4 .Os .Sh NAME @@ -192,6 +192,15 @@ device will be used: .Pp (Note the mac address of the wireless device is forced to match the wired device as a workaround.) +.Pp +The following example shows how to create an infiniband failover interface. +.Bd -literal -offset indent +# ifconfig ib0 up +# ifconfig ib1 up +# ifconfig lagg0 create laggtype infiniband +# ifconfig lagg0 laggproto failover laggport ib0 laggport ib1 \e + 1.1.1.1 netmask 255.255.255.0 +.Ed .Sh SEE ALSO .Xr ng_one2many 4 , .Xr ifconfig 8 , diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c index 969efcd58a36..6deecb9fff6b 100644 --- a/sys/net/ieee8023ad_lacp.c +++ b/sys/net/ieee8023ad_lacp.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index ccf2c4383284..8731cb5b0188 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -110,7 +110,7 @@ void (*vlan_input_p)(struct ifnet *, struct mbuf *); void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /* if_lagg(4) support */ -struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); +struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; @@ -608,9 +608,9 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) /* Handle input from a lagg(4) port */ if (ifp->if_type == IFT_IEEE8023ADLAG) { - KASSERT(lagg_input_p != NULL, + KASSERT(lagg_input_ethernet_p != NULL, ("%s: if_lagg not loaded!", __func__)); - m = (*lagg_input_p)(ifp, m); + m = (*lagg_input_ethernet_p)(ifp, m); if (m != NULL) ifp = m->m_pkthdr.rcvif; else { diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c index 7800aa54ab35..ca86724b0561 100644 --- a/sys/net/if_infiniband.c +++ b/sys/net/if_infiniband.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -58,6 +59,9 @@ __FBSDID("$FreeBSD$"); #include +/* if_lagg(4) support */ +struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); + #ifdef INET static inline void infiniband_ipv4_multicast_map(uint32_t addr, @@ -346,6 +350,16 @@ infiniband_input(struct ifnet *ifp, struct mbuf *m) /* Direct packet to correct FIB based on interface config. */ M_SETFIB(m, ifp->if_fib); + /* Handle input from a lagg port */ + if (ifp->if_type == IFT_INFINIBANDLAG) { + KASSERT(lagg_input_infiniband_p != NULL, + ("%s: if_lagg not loaded!", __func__)); + m = (*lagg_input_infiniband_p)(ifp, m); + if (__predict_false(m == NULL)) + goto done; + ifp = m->m_pkthdr.rcvif; + } + /* * Dispatch frame to upper layer. */ diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index ba9c2d2f58fe..dee4a5be6cca 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #if defined(INET) || defined(INET6) #include @@ -131,7 +132,8 @@ static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface"); static void lagg_capabilities(struct lagg_softc *); static int lagg_port_create(struct lagg_softc *, struct ifnet *); static int lagg_port_destroy(struct lagg_port *, int); -static struct mbuf *lagg_input(struct ifnet *, struct mbuf *); +static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *); +static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *); static void lagg_linkstate(struct lagg_softc *); static void lagg_port_state(struct ifnet *, int); static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); @@ -164,7 +166,8 @@ static int lagg_setflag(struct lagg_port *, int, int, int (*func)(struct ifnet *, int)); static int lagg_setflags(struct lagg_port *, int status); static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt); -static int lagg_transmit(struct ifnet *, struct mbuf *); +static int lagg_transmit_ethernet(struct ifnet *, struct mbuf *); +static int lagg_transmit_infiniband(struct ifnet *, struct mbuf *); static void lagg_qflush(struct ifnet *); static int lagg_media_change(struct ifnet *); static void lagg_media_status(struct ifnet *, struct ifmediareq *); @@ -327,7 +330,8 @@ lagg_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: - lagg_input_p = lagg_input; + lagg_input_ethernet_p = lagg_input_ethernet; + lagg_input_infiniband_p = lagg_input_infiniband; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, lagg_port_ifdetach, NULL, @@ -336,7 +340,8 @@ lagg_modevent(module_t mod, int type, void *data) case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); - lagg_input_p = NULL; + lagg_input_ethernet_p = NULL; + lagg_input_infiniband_p = NULL; lagg_linkstate_p = NULL; break; default: @@ -353,6 +358,7 @@ static moduledata_t lagg_mod = { DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_lagg, 1); +MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1); static void lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr) @@ -504,18 +510,48 @@ lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) static int lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) { + struct iflaggparam iflp; struct lagg_softc *sc; struct ifnet *ifp; - static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ + int if_type; + int error; + static const uint8_t eaddr[LAGG_ADDR_LEN]; + static const uint8_t ib_bcast_addr[INFINIBAND_ADDR_LEN] = { + 0x00, 0xff, 0xff, 0xff, + 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff + }; + + if (params != NULL) { + error = copyin(params, &iflp, sizeof(iflp)); + if (error) + return (error); + + switch (iflp.lagg_type) { + case LAGG_TYPE_ETHERNET: + if_type = IFT_ETHER; + break; + case LAGG_TYPE_INFINIBAND: + if_type = IFT_INFINIBAND; + break; + default: + return (EINVAL); + } + } else { + if_type = IFT_ETHER; + } sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO); - ifp = sc->sc_ifp = if_alloc(IFT_ETHER); + ifp = sc->sc_ifp = if_alloc(if_type); if (ifp == NULL) { free(sc, M_LAGG); return (ENOSPC); } LAGG_SX_INIT(sc); + mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF); + callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0); + LAGG_XLOCK(sc); if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; @@ -530,15 +566,25 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) CK_SLIST_INIT(&sc->sc_ports); - /* Initialise pseudo media types */ - ifmedia_init(&sc->sc_media, 0, lagg_media_change, - lagg_media_status); - ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); + switch (if_type) { + case IFT_ETHER: + /* Initialise pseudo media types */ + ifmedia_init(&sc->sc_media, 0, lagg_media_change, + lagg_media_status); + ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); - if_initname(ifp, laggname, unit); + if_initname(ifp, laggname, unit); + ifp->if_transmit = lagg_transmit_ethernet; + break; + case IFT_INFINIBAND: + if_initname(ifp, laggname, unit); + ifp->if_transmit = lagg_transmit_infiniband; + break; + default: + break; + } ifp->if_softc = sc; - ifp->if_transmit = lagg_transmit; ifp->if_qflush = lagg_qflush; ifp->if_init = lagg_init; ifp->if_ioctl = lagg_ioctl; @@ -555,9 +601,18 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) /* * Attach as an ordinary ethernet device, children will be attached - * as special device IFT_IEEE8023ADLAG. + * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG. */ - ether_ifattach(ifp, eaddr); + switch (if_type) { + case IFT_ETHER: + ether_ifattach(ifp, eaddr); + break; + case IFT_INFINIBAND: + infiniband_ifattach(ifp, eaddr, ib_bcast_addr); + break; + default: + break; + } sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); @@ -595,14 +650,24 @@ lagg_clone_destroy(struct ifnet *ifp) lagg_proto_detach(sc); LAGG_XUNLOCK(sc); - ifmedia_removeall(&sc->sc_media); - ether_ifdetach(ifp); + switch (ifp->if_type) { + case IFT_ETHER: + ifmedia_removeall(&sc->sc_media); + ether_ifdetach(ifp); + break; + case IFT_INFINIBAND: + infiniband_ifdetach(ifp); + break; + default: + break; + } if_free(ifp); LAGG_LIST_LOCK(); SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); LAGG_LIST_UNLOCK(); + mtx_destroy(&sc->sc_mtx); LAGG_SX_DESTROY(sc); free(sc, M_LAGG); } @@ -669,6 +734,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) struct lagg_port *lp, *tlp; struct ifreq ifr; int error, i, oldmtu; + int if_type; uint64_t *pval; LAGG_XLOCK_ASSERT(sc); @@ -695,9 +761,22 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) return (EBUSY); } - /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ - if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) - return (EPROTONOSUPPORT); + switch (sc->sc_ifp->if_type) { + case IFT_ETHER: + /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ + if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) + return (EPROTONOSUPPORT); + if_type = IFT_IEEE8023ADLAG; + break; + case IFT_INFINIBAND: + /* XXX Disallow non-infiniband interfaces */ + if (ifp->if_type != IFT_INFINIBAND) + return (EPROTONOSUPPORT); + if_type = IFT_INFINIBANDLAG; + break; + default: + break; + } /* Allow the first Ethernet member to define the MTU */ oldmtu = -1; @@ -754,14 +833,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) if_ref(ifp); lp->lp_ifp = ifp; - bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); + bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen); lp->lp_ifcapenable = ifp->if_capenable; if (CK_SLIST_EMPTY(&sc->sc_ports)) { - bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); + bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); lagg_proto_lladdr(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } else { - if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); + if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); } lagg_setflags(lp, 1); @@ -770,7 +849,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) /* Change the interface type */ lp->lp_iftype = ifp->if_type; - ifp->if_type = IFT_IEEE8023ADLAG; + ifp->if_type = if_type; ifp->if_lagg = lp; lp->lp_ioctl = ifp->if_ioctl; ifp->if_ioctl = lagg_port_ioctl; @@ -887,15 +966,15 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) /* Update the primary interface */ if (lp == sc->sc_primary) { - uint8_t lladdr[ETHER_ADDR_LEN]; + uint8_t lladdr[LAGG_ADDR_LEN]; if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL) - bzero(&lladdr, ETHER_ADDR_LEN); + bzero(&lladdr, LAGG_ADDR_LEN); else - bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN); + bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN); sc->sc_primary = lp0; if (sc->sc_destroying == 0) { - bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); + bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen); lagg_proto_lladdr(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } @@ -905,7 +984,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) * as well, to switch from old lladdr to its 'real' one) */ CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) - if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN); + if_setlladdr(lp_ptr->lp_ifp, lladdr, lp_ptr->lp_ifp->if_addrlen); } if (lp->lp_ifflags) @@ -914,7 +993,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) if (lp->lp_detaching == 0) { lagg_setflags(lp, 0); lagg_setcaps(lp, lp->lp_ifcapenable); - if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN); + if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen); } /* @@ -938,9 +1017,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) int error = 0; /* Should be checked by the caller */ - if (ifp->if_type != IFT_IEEE8023ADLAG || - (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) + switch (ifp->if_type) { + case IFT_IEEE8023ADLAG: + case IFT_INFINIBANDLAG: + if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) + goto fallback; + break; + default: goto fallback; + } switch (cmd) { case SIOCGLAGGPORT: @@ -1129,6 +1214,44 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) } +static void +lagg_watchdog_infiniband(void *arg) +{ + struct lagg_softc *sc; + struct lagg_port *lp; + struct ifnet *ifp; + struct ifnet *lp_ifp; + + sc = arg; + + /* + * Because infiniband nodes have a fixed MAC address, which is + * generated by the so-called GID, we need to regularly update + * the link level address of the parent lagg device when + * the active port changes. Possibly we could piggy-back on + * link up/down events aswell, but using a timer also provides + * a guarantee against too frequent events. This operation + * does not have to be atomic. + */ + LAGG_RLOCK(); + lp = lagg_link_active(sc, sc->sc_primary); + if (lp != NULL) { + ifp = sc->sc_ifp; + lp_ifp = lp->lp_ifp; + + if (ifp != NULL && lp_ifp != NULL && + memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0) { + memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen); + CURVNET_SET(ifp->if_vnet); + EVENTHANDLER_INVOKE(iflladdr_event, ifp); + CURVNET_RESTORE(); + } + } + LAGG_RUNLOCK(); + + callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg); +} + static void lagg_init(void *xsc) { @@ -1151,12 +1274,18 @@ lagg_init(void *xsc) */ CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp), - ETHER_ADDR_LEN) != 0) - if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN); + ifp->if_addrlen) != 0) + if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen); } lagg_proto_init(sc); + if (ifp->if_type == IFT_INFINIBAND) { + mtx_lock(&sc->sc_mtx); + lagg_watchdog_infiniband(sc); + mtx_unlock(&sc->sc_mtx); + } + LAGG_XUNLOCK(sc); } @@ -1173,6 +1302,12 @@ lagg_stop(struct lagg_softc *sc) ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lagg_proto_stop(sc); + + mtx_lock(&sc->sc_mtx); + callout_stop(&sc->sc_watchdog); + mtx_unlock(&sc->sc_mtx); + + callout_drain(&sc->sc_watchdog); } static int @@ -1228,7 +1363,12 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EPROTONOSUPPORT; break; } - + /* Infiniband only supports the failover protocol. */ + if (ra->ra_proto != LAGG_PROTO_FAILOVER && + ifp->if_type == IFT_INFINIBAND) { + error = EPROTONOSUPPORT; + break; + } LAGG_XLOCK(sc); lagg_proto_detach(sc); LAGG_UNLOCK_ASSERT(); @@ -1546,7 +1686,10 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: - error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); + if (ifp->if_type == IFT_INFINIBAND) + error = EINVAL; + else + error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; case SIOCSIFCAP: @@ -1855,7 +1998,7 @@ lagg_setflags(struct lagg_port *lp, int status) } static int -lagg_transmit(struct ifnet *ifp, struct mbuf *m) +lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; int error; @@ -1880,6 +2023,32 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m) return (error); } +static int +lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m) +{ + struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; + int error; + +#if defined(KERN_TLS) || defined(RATELIMIT) + if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) + MPASS(m->m_pkthdr.snd_tag->ifp == ifp); +#endif + LAGG_RLOCK(); + /* We need a Tx algorithm and at least one port */ + if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { + LAGG_RUNLOCK(); + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (ENXIO); + } + + INFINIBAND_BPF_MTAP(ifp, m); + + error = lagg_proto_start(sc, m); + LAGG_RUNLOCK(); + return (error); +} + /* * The ifp->if_qflush entry point for lagg(4) is no-op. */ @@ -1889,7 +2058,7 @@ lagg_qflush(struct ifnet *ifp __unused) } static struct mbuf * -lagg_input(struct ifnet *ifp, struct mbuf *m) +lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m) { struct lagg_port *lp = ifp->if_lagg; struct lagg_softc *sc = lp->lp_softc; @@ -1916,6 +2085,34 @@ lagg_input(struct ifnet *ifp, struct mbuf *m) return (m); } +static struct mbuf * +lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m) +{ + struct lagg_port *lp = ifp->if_lagg; + struct lagg_softc *sc = lp->lp_softc; + struct ifnet *scifp = sc->sc_ifp; + + LAGG_RLOCK(); + if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + lp->lp_detaching != 0 || + sc->sc_proto == LAGG_PROTO_NONE) { + LAGG_RUNLOCK(); + m_freem(m); + return (NULL); + } + + INFINIBAND_BPF_MTAP(scifp, m); + + m = lagg_proto_input(sc, lp, m); + if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) { + m_freem(m); + m = NULL; + } + + LAGG_RUNLOCK(); + return (m); +} + static int lagg_media_change(struct ifnet *ifp) { diff --git a/sys/net/if_lagg.h b/sys/net/if_lagg.h index 738bfdcecf09..fc3e782d4444 100644 --- a/sys/net/if_lagg.h +++ b/sys/net/if_lagg.h @@ -72,6 +72,33 @@ struct lagg_protos { { "default", LAGG_PROTO_DEFAULT } \ } +/* Supported lagg TYPEs */ +typedef enum { + LAGG_TYPE_ETHERNET = 0, /* ethernet (default) */ + LAGG_TYPE_INFINIBAND, /* infiniband */ + LAGG_TYPE_MAX, +} lagg_type; + +struct lagg_types { + const char *lt_name; + lagg_type lt_value; +}; + +#define LAGG_TYPE_DEFAULT LAGG_TYPE_ETHERNET +#define LAGG_TYPES { \ + { "ethernet", LAGG_TYPE_ETHERNET }, \ + { "infiniband", LAGG_TYPE_INFINIBAND }, \ +} + +/* + * lagg create clone params + */ +struct iflaggparam { + uint8_t lagg_type; /* see LAGG_TYPE_XXX */ + uint8_t reserved_8[3]; + uint32_t reserved_32[3]; +}; + /* * lagg ioctls. */ @@ -206,7 +233,7 @@ struct lagg_counters { struct lagg_softc { struct ifnet *sc_ifp; /* virtual interface */ - struct rmlock sc_mtx; + struct mtx sc_mtx; /* watchdog mutex */ struct sx sc_sx; int sc_proto; /* lagg protocol */ u_int sc_count; /* number of ports */ @@ -230,12 +257,15 @@ struct lagg_softc { u_int sc_opts; int flowid_shift; /* shift the flowid */ struct lagg_counters detached_counters; /* detached ports sum */ + struct callout sc_watchdog; /* watchdog timer */ }; struct lagg_port { struct ifnet *lp_ifp; /* physical interface */ struct lagg_softc *lp_softc; /* parent lagg */ - uint8_t lp_lladdr[ETHER_ADDR_LEN]; +#define LAGG_ADDR_LEN \ + MAX(INFINIBAND_ADDR_LEN, ETHER_ADDR_LEN) + uint8_t lp_lladdr[LAGG_ADDR_LEN]; u_char lp_iftype; /* interface type */ uint32_t lp_prio; /* port priority */ @@ -257,7 +287,8 @@ struct lagg_port { struct epoch_context lp_epoch_ctx; }; -extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); +extern struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); +extern struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); extern void (*lagg_linkstate_p)(struct ifnet *, int ); int lagg_enqueue(struct ifnet *, struct mbuf *); diff --git a/sys/net/if_types.h b/sys/net/if_types.h index 61c432ba890f..1103d5f90928 100644 --- a/sys/net/if_types.h +++ b/sys/net/if_types.h @@ -242,6 +242,7 @@ typedef enum { IFT_OPTICALCHANNEL = 0xc3, /* Optical Channel */ IFT_OPTICALTRANSPORT = 0xc4, /* Optical Transport */ IFT_INFINIBAND = 0xc7, /* Infiniband */ + IFT_INFINIBANDLAG = 0xc8, /* Infiniband Link Aggregate */ IFT_BRIDGE = 0xd1, /* Transparent bridge interface */ IFT_STF = 0xd7, /* 6to4 interface */