diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index 2978fbdb5691..da7e3f049a59 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd September 17, 2020 +.Dd October 21, 2020 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2481,6 +2481,12 @@ Set to 0 to disable. .Pp The following parameters are specific to lagg interfaces: .Bl -tag -width indent +.It Cm laggtype Ar type +When creating a lagg interface the type can be specified as either +.Cm ethernet +or +.Cm infiniband . +If not specified ethernet is the default lagg type. .It Cm laggport Ar interface Add the interface named by .Ar interface diff --git a/sbin/ifconfig/iflagg.c b/sbin/ifconfig/iflagg.c index a892ffd21eb0..63a5928a5d44 100644 --- a/sbin/ifconfig/iflagg.c +++ b/sbin/ifconfig/iflagg.c @@ -30,7 +30,11 @@ static const char rcsid[] = #include "ifconfig.h" -char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */ +static struct iflaggparam params = { + .lagg_type = LAGG_TYPE_DEFAULT, +}; + +static char lacpbuf[120]; /* LACP peer '[(a,a,a),(p,p,p)]' */ static void setlaggport(const char *val, int d, int s, const struct afswtch *afp) @@ -301,7 +305,31 @@ lagg_status(int s) } } +static +DECL_CMD_FUNC(setlaggtype, arg, d) +{ + static const struct lagg_types lt[] = LAGG_TYPES; + int i; + + for (i = 0; i < nitems(lt); i++) { + if (strcmp(arg, lt[i].lt_name) == 0) { + params.lagg_type = lt[i].lt_value; + return; + } + } + errx(1, "invalid lagg type: %s", arg); +} + +static void +lagg_create(int s, struct ifreq *ifr) +{ + ifr->ifr_data = (caddr_t) ¶ms; + if (ioctl(s, SIOCIFCREATE2, ifr) < 0) + err(1, "SIOCIFCREATE2"); +} + static struct cmd lagg_cmds[] = { + DEF_CLONE_CMD_ARG("laggtype", setlaggtype), DEF_CMD_ARG("laggport", setlaggport), DEF_CMD_ARG("-laggport", unsetlaggport), DEF_CMD_ARG("laggproto", setlaggproto), @@ -335,4 +363,5 @@ lagg_ctor(void) for (i = 0; i < nitems(lagg_cmds); i++) cmd_register(&lagg_cmds[i]); af_register(&af_lagg); + clone_setdefcallback("lagg", lagg_create); } diff --git a/share/man/man4/lagg.4 b/share/man/man4/lagg.4 index 934593551905..f7d8e1c5e1f5 100644 --- a/share/man/man4/lagg.4 +++ b/share/man/man4/lagg.4 @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 18, 2017 +.Dd October 21, 2020 .Dt LAGG 4 .Os .Sh NAME @@ -192,6 +192,15 @@ device will be used: .Pp (Note the mac address of the wireless device is forced to match the wired device as a workaround.) +.Pp +The following example shows how to create an infiniband failover interface. +.Bd -literal -offset indent +# ifconfig ib0 up +# ifconfig ib1 up +# ifconfig lagg0 create laggtype infiniband +# ifconfig lagg0 laggproto failover laggport ib0 laggport ib1 \e + 1.1.1.1 netmask 255.255.255.0 +.Ed .Sh SEE ALSO .Xr ng_one2many 4 , .Xr ifconfig 8 , diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c index 969efcd58a36..6deecb9fff6b 100644 --- a/sys/net/ieee8023ad_lacp.c +++ b/sys/net/ieee8023ad_lacp.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index ccf2c4383284..8731cb5b0188 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -110,7 +110,7 @@ void (*vlan_input_p)(struct ifnet *, struct mbuf *); void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /* if_lagg(4) support */ -struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); +struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; @@ -608,9 +608,9 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) /* Handle input from a lagg(4) port */ if (ifp->if_type == IFT_IEEE8023ADLAG) { - KASSERT(lagg_input_p != NULL, + KASSERT(lagg_input_ethernet_p != NULL, ("%s: if_lagg not loaded!", __func__)); - m = (*lagg_input_p)(ifp, m); + m = (*lagg_input_ethernet_p)(ifp, m); if (m != NULL) ifp = m->m_pkthdr.rcvif; else { diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c index 7800aa54ab35..ca86724b0561 100644 --- a/sys/net/if_infiniband.c +++ b/sys/net/if_infiniband.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -58,6 +59,9 @@ __FBSDID("$FreeBSD$"); #include +/* if_lagg(4) support */ +struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); + #ifdef INET static inline void infiniband_ipv4_multicast_map(uint32_t addr, @@ -346,6 +350,16 @@ infiniband_input(struct ifnet *ifp, struct mbuf *m) /* Direct packet to correct FIB based on interface config. */ M_SETFIB(m, ifp->if_fib); + /* Handle input from a lagg port */ + if (ifp->if_type == IFT_INFINIBANDLAG) { + KASSERT(lagg_input_infiniband_p != NULL, + ("%s: if_lagg not loaded!", __func__)); + m = (*lagg_input_infiniband_p)(ifp, m); + if (__predict_false(m == NULL)) + goto done; + ifp = m->m_pkthdr.rcvif; + } + /* * Dispatch frame to upper layer. */ diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index ba9c2d2f58fe..dee4a5be6cca 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #if defined(INET) || defined(INET6) #include @@ -131,7 +132,8 @@ static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface"); static void lagg_capabilities(struct lagg_softc *); static int lagg_port_create(struct lagg_softc *, struct ifnet *); static int lagg_port_destroy(struct lagg_port *, int); -static struct mbuf *lagg_input(struct ifnet *, struct mbuf *); +static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *); +static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *); static void lagg_linkstate(struct lagg_softc *); static void lagg_port_state(struct ifnet *, int); static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); @@ -164,7 +166,8 @@ static int lagg_setflag(struct lagg_port *, int, int, int (*func)(struct ifnet *, int)); static int lagg_setflags(struct lagg_port *, int status); static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt); -static int lagg_transmit(struct ifnet *, struct mbuf *); +static int lagg_transmit_ethernet(struct ifnet *, struct mbuf *); +static int lagg_transmit_infiniband(struct ifnet *, struct mbuf *); static void lagg_qflush(struct ifnet *); static int lagg_media_change(struct ifnet *); static void lagg_media_status(struct ifnet *, struct ifmediareq *); @@ -327,7 +330,8 @@ lagg_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: - lagg_input_p = lagg_input; + lagg_input_ethernet_p = lagg_input_ethernet; + lagg_input_infiniband_p = lagg_input_infiniband; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, lagg_port_ifdetach, NULL, @@ -336,7 +340,8 @@ lagg_modevent(module_t mod, int type, void *data) case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); - lagg_input_p = NULL; + lagg_input_ethernet_p = NULL; + lagg_input_infiniband_p = NULL; lagg_linkstate_p = NULL; break; default: @@ -353,6 +358,7 @@ static moduledata_t lagg_mod = { DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_lagg, 1); +MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1); static void lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr) @@ -504,18 +510,48 @@ lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) static int lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) { + struct iflaggparam iflp; struct lagg_softc *sc; struct ifnet *ifp; - static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ + int if_type; + int error; + static const uint8_t eaddr[LAGG_ADDR_LEN]; + static const uint8_t ib_bcast_addr[INFINIBAND_ADDR_LEN] = { + 0x00, 0xff, 0xff, 0xff, + 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff + }; + + if (params != NULL) { + error = copyin(params, &iflp, sizeof(iflp)); + if (error) + return (error); + + switch (iflp.lagg_type) { + case LAGG_TYPE_ETHERNET: + if_type = IFT_ETHER; + break; + case LAGG_TYPE_INFINIBAND: + if_type = IFT_INFINIBAND; + break; + default: + return (EINVAL); + } + } else { + if_type = IFT_ETHER; + } sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO); - ifp = sc->sc_ifp = if_alloc(IFT_ETHER); + ifp = sc->sc_ifp = if_alloc(if_type); if (ifp == NULL) { free(sc, M_LAGG); return (ENOSPC); } LAGG_SX_INIT(sc); + mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF); + callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0); + LAGG_XLOCK(sc); if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; @@ -530,15 +566,25 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) CK_SLIST_INIT(&sc->sc_ports); - /* Initialise pseudo media types */ - ifmedia_init(&sc->sc_media, 0, lagg_media_change, - lagg_media_status); - ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); + switch (if_type) { + case IFT_ETHER: + /* Initialise pseudo media types */ + ifmedia_init(&sc->sc_media, 0, lagg_media_change, + lagg_media_status); + ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); - if_initname(ifp, laggname, unit); + if_initname(ifp, laggname, unit); + ifp->if_transmit = lagg_transmit_ethernet; + break; + case IFT_INFINIBAND: + if_initname(ifp, laggname, unit); + ifp->if_transmit = lagg_transmit_infiniband; + break; + default: + break; + } ifp->if_softc = sc; - ifp->if_transmit = lagg_transmit; ifp->if_qflush = lagg_qflush; ifp->if_init = lagg_init; ifp->if_ioctl = lagg_ioctl; @@ -555,9 +601,18 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) /* * Attach as an ordinary ethernet device, children will be attached - * as special device IFT_IEEE8023ADLAG. + * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG. */ - ether_ifattach(ifp, eaddr); + switch (if_type) { + case IFT_ETHER: + ether_ifattach(ifp, eaddr); + break; + case IFT_INFINIBAND: + infiniband_ifattach(ifp, eaddr, ib_bcast_addr); + break; + default: + break; + } sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); @@ -595,14 +650,24 @@ lagg_clone_destroy(struct ifnet *ifp) lagg_proto_detach(sc); LAGG_XUNLOCK(sc); - ifmedia_removeall(&sc->sc_media); - ether_ifdetach(ifp); + switch (ifp->if_type) { + case IFT_ETHER: + ifmedia_removeall(&sc->sc_media); + ether_ifdetach(ifp); + break; + case IFT_INFINIBAND: + infiniband_ifdetach(ifp); + break; + default: + break; + } if_free(ifp); LAGG_LIST_LOCK(); SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); LAGG_LIST_UNLOCK(); + mtx_destroy(&sc->sc_mtx); LAGG_SX_DESTROY(sc); free(sc, M_LAGG); } @@ -669,6 +734,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) struct lagg_port *lp, *tlp; struct ifreq ifr; int error, i, oldmtu; + int if_type; uint64_t *pval; LAGG_XLOCK_ASSERT(sc); @@ -695,9 +761,22 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) return (EBUSY); } - /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ - if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) - return (EPROTONOSUPPORT); + switch (sc->sc_ifp->if_type) { + case IFT_ETHER: + /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ + if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) + return (EPROTONOSUPPORT); + if_type = IFT_IEEE8023ADLAG; + break; + case IFT_INFINIBAND: + /* XXX Disallow non-infiniband interfaces */ + if (ifp->if_type != IFT_INFINIBAND) + return (EPROTONOSUPPORT); + if_type = IFT_INFINIBANDLAG; + break; + default: + break; + } /* Allow the first Ethernet member to define the MTU */ oldmtu = -1; @@ -754,14 +833,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) if_ref(ifp); lp->lp_ifp = ifp; - bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); + bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen); lp->lp_ifcapenable = ifp->if_capenable; if (CK_SLIST_EMPTY(&sc->sc_ports)) { - bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); + bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); lagg_proto_lladdr(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } else { - if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); + if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); } lagg_setflags(lp, 1); @@ -770,7 +849,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) /* Change the interface type */ lp->lp_iftype = ifp->if_type; - ifp->if_type = IFT_IEEE8023ADLAG; + ifp->if_type = if_type; ifp->if_lagg = lp; lp->lp_ioctl = ifp->if_ioctl; ifp->if_ioctl = lagg_port_ioctl; @@ -887,15 +966,15 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) /* Update the primary interface */ if (lp == sc->sc_primary) { - uint8_t lladdr[ETHER_ADDR_LEN]; + uint8_t lladdr[LAGG_ADDR_LEN]; if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL) - bzero(&lladdr, ETHER_ADDR_LEN); + bzero(&lladdr, LAGG_ADDR_LEN); else - bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN); + bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN); sc->sc_primary = lp0; if (sc->sc_destroying == 0) { - bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); + bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen); lagg_proto_lladdr(sc); EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); } @@ -905,7 +984,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) * as well, to switch from old lladdr to its 'real' one) */ CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) - if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN); + if_setlladdr(lp_ptr->lp_ifp, lladdr, lp_ptr->lp_ifp->if_addrlen); } if (lp->lp_ifflags) @@ -914,7 +993,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) if (lp->lp_detaching == 0) { lagg_setflags(lp, 0); lagg_setcaps(lp, lp->lp_ifcapenable); - if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN); + if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen); } /* @@ -938,9 +1017,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) int error = 0; /* Should be checked by the caller */ - if (ifp->if_type != IFT_IEEE8023ADLAG || - (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) + switch (ifp->if_type) { + case IFT_IEEE8023ADLAG: + case IFT_INFINIBANDLAG: + if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) + goto fallback; + break; + default: goto fallback; + } switch (cmd) { case SIOCGLAGGPORT: @@ -1129,6 +1214,44 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) } +static void +lagg_watchdog_infiniband(void *arg) +{ + struct lagg_softc *sc; + struct lagg_port *lp; + struct ifnet *ifp; + struct ifnet *lp_ifp; + + sc = arg; + + /* + * Because infiniband nodes have a fixed MAC address, which is + * generated by the so-called GID, we need to regularly update + * the link level address of the parent lagg device when + * the active port changes. Possibly we could piggy-back on + * link up/down events aswell, but using a timer also provides + * a guarantee against too frequent events. This operation + * does not have to be atomic. + */ + LAGG_RLOCK(); + lp = lagg_link_active(sc, sc->sc_primary); + if (lp != NULL) { + ifp = sc->sc_ifp; + lp_ifp = lp->lp_ifp; + + if (ifp != NULL && lp_ifp != NULL && + memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0) { + memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen); + CURVNET_SET(ifp->if_vnet); + EVENTHANDLER_INVOKE(iflladdr_event, ifp); + CURVNET_RESTORE(); + } + } + LAGG_RUNLOCK(); + + callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg); +} + static void lagg_init(void *xsc) { @@ -1151,12 +1274,18 @@ lagg_init(void *xsc) */ CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp), - ETHER_ADDR_LEN) != 0) - if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN); + ifp->if_addrlen) != 0) + if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen); } lagg_proto_init(sc); + if (ifp->if_type == IFT_INFINIBAND) { + mtx_lock(&sc->sc_mtx); + lagg_watchdog_infiniband(sc); + mtx_unlock(&sc->sc_mtx); + } + LAGG_XUNLOCK(sc); } @@ -1173,6 +1302,12 @@ lagg_stop(struct lagg_softc *sc) ifp->if_drv_flags &= ~IFF_DRV_RUNNING; lagg_proto_stop(sc); + + mtx_lock(&sc->sc_mtx); + callout_stop(&sc->sc_watchdog); + mtx_unlock(&sc->sc_mtx); + + callout_drain(&sc->sc_watchdog); } static int @@ -1228,7 +1363,12 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EPROTONOSUPPORT; break; } - + /* Infiniband only supports the failover protocol. */ + if (ra->ra_proto != LAGG_PROTO_FAILOVER && + ifp->if_type == IFT_INFINIBAND) { + error = EPROTONOSUPPORT; + break; + } LAGG_XLOCK(sc); lagg_proto_detach(sc); LAGG_UNLOCK_ASSERT(); @@ -1546,7 +1686,10 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: - error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); + if (ifp->if_type == IFT_INFINIBAND) + error = EINVAL; + else + error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; case SIOCSIFCAP: @@ -1855,7 +1998,7 @@ lagg_setflags(struct lagg_port *lp, int status) } static int -lagg_transmit(struct ifnet *ifp, struct mbuf *m) +lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; int error; @@ -1880,6 +2023,32 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m) return (error); } +static int +lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m) +{ + struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; + int error; + +#if defined(KERN_TLS) || defined(RATELIMIT) + if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) + MPASS(m->m_pkthdr.snd_tag->ifp == ifp); +#endif + LAGG_RLOCK(); + /* We need a Tx algorithm and at least one port */ + if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { + LAGG_RUNLOCK(); + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (ENXIO); + } + + INFINIBAND_BPF_MTAP(ifp, m); + + error = lagg_proto_start(sc, m); + LAGG_RUNLOCK(); + return (error); +} + /* * The ifp->if_qflush entry point for lagg(4) is no-op. */ @@ -1889,7 +2058,7 @@ lagg_qflush(struct ifnet *ifp __unused) } static struct mbuf * -lagg_input(struct ifnet *ifp, struct mbuf *m) +lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m) { struct lagg_port *lp = ifp->if_lagg; struct lagg_softc *sc = lp->lp_softc; @@ -1916,6 +2085,34 @@ lagg_input(struct ifnet *ifp, struct mbuf *m) return (m); } +static struct mbuf * +lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m) +{ + struct lagg_port *lp = ifp->if_lagg; + struct lagg_softc *sc = lp->lp_softc; + struct ifnet *scifp = sc->sc_ifp; + + LAGG_RLOCK(); + if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + lp->lp_detaching != 0 || + sc->sc_proto == LAGG_PROTO_NONE) { + LAGG_RUNLOCK(); + m_freem(m); + return (NULL); + } + + INFINIBAND_BPF_MTAP(scifp, m); + + m = lagg_proto_input(sc, lp, m); + if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) { + m_freem(m); + m = NULL; + } + + LAGG_RUNLOCK(); + return (m); +} + static int lagg_media_change(struct ifnet *ifp) { diff --git a/sys/net/if_lagg.h b/sys/net/if_lagg.h index 738bfdcecf09..fc3e782d4444 100644 --- a/sys/net/if_lagg.h +++ b/sys/net/if_lagg.h @@ -72,6 +72,33 @@ struct lagg_protos { { "default", LAGG_PROTO_DEFAULT } \ } +/* Supported lagg TYPEs */ +typedef enum { + LAGG_TYPE_ETHERNET = 0, /* ethernet (default) */ + LAGG_TYPE_INFINIBAND, /* infiniband */ + LAGG_TYPE_MAX, +} lagg_type; + +struct lagg_types { + const char *lt_name; + lagg_type lt_value; +}; + +#define LAGG_TYPE_DEFAULT LAGG_TYPE_ETHERNET +#define LAGG_TYPES { \ + { "ethernet", LAGG_TYPE_ETHERNET }, \ + { "infiniband", LAGG_TYPE_INFINIBAND }, \ +} + +/* + * lagg create clone params + */ +struct iflaggparam { + uint8_t lagg_type; /* see LAGG_TYPE_XXX */ + uint8_t reserved_8[3]; + uint32_t reserved_32[3]; +}; + /* * lagg ioctls. */ @@ -206,7 +233,7 @@ struct lagg_counters { struct lagg_softc { struct ifnet *sc_ifp; /* virtual interface */ - struct rmlock sc_mtx; + struct mtx sc_mtx; /* watchdog mutex */ struct sx sc_sx; int sc_proto; /* lagg protocol */ u_int sc_count; /* number of ports */ @@ -230,12 +257,15 @@ struct lagg_softc { u_int sc_opts; int flowid_shift; /* shift the flowid */ struct lagg_counters detached_counters; /* detached ports sum */ + struct callout sc_watchdog; /* watchdog timer */ }; struct lagg_port { struct ifnet *lp_ifp; /* physical interface */ struct lagg_softc *lp_softc; /* parent lagg */ - uint8_t lp_lladdr[ETHER_ADDR_LEN]; +#define LAGG_ADDR_LEN \ + MAX(INFINIBAND_ADDR_LEN, ETHER_ADDR_LEN) + uint8_t lp_lladdr[LAGG_ADDR_LEN]; u_char lp_iftype; /* interface type */ uint32_t lp_prio; /* port priority */ @@ -257,7 +287,8 @@ struct lagg_port { struct epoch_context lp_epoch_ctx; }; -extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); +extern struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); +extern struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); extern void (*lagg_linkstate_p)(struct ifnet *, int ); int lagg_enqueue(struct ifnet *, struct mbuf *); diff --git a/sys/net/if_types.h b/sys/net/if_types.h index 61c432ba890f..1103d5f90928 100644 --- a/sys/net/if_types.h +++ b/sys/net/if_types.h @@ -242,6 +242,7 @@ typedef enum { IFT_OPTICALCHANNEL = 0xc3, /* Optical Channel */ IFT_OPTICALTRANSPORT = 0xc4, /* Optical Transport */ IFT_INFINIBAND = 0xc7, /* Infiniband */ + IFT_INFINIBANDLAG = 0xc8, /* Infiniband Link Aggregate */ IFT_BRIDGE = 0xd1, /* Transparent bridge interface */ IFT_STF = 0xd7, /* 6to4 interface */