diff --git a/etc/protocols b/etc/protocols index f79362b56461..7c3654f17969 100644 --- a/etc/protocols +++ b/etc/protocols @@ -119,7 +119,7 @@ ipcomp 108 IPComp # IP Payload Compression Protocol snp 109 SNP # Sitara Networks Protocol compaq-peer 110 Compaq-Peer # Compaq Peer Protocol ipx-in-ip 111 IPX-in-IP # IPX in IP -vrrp 112 VRRP # Virtual Router Redundancy Protocol +carp 112 CARP vrrp # Common Address Redundancy Protocol pgm 113 PGM # PGM Reliable Transport Protocol # 114 # any 0-hop protocol l2tp 115 L2TP # Layer Two Tunneling Protocol @@ -142,5 +142,6 @@ pipe 131 PIPE # Private IP Encapsulation within IP sctp 132 SCTP # Stream Control Transmission Protocol fc 133 FC # Fibre Channel # 134-254 # Unassigned +pfsync 240 PFSYNC # PF Synchronization # 255 # Reserved divert 258 DIVERT # Divert pseudo-protocol [non IANA] diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile index 117441bffb74..6b301e9b1e91 100644 --- a/sbin/ifconfig/Makefile +++ b/sbin/ifconfig/Makefile @@ -23,6 +23,9 @@ SRCS+= ifmedia.c # SIOC[GS]IFMEDIA support SRCS+= ifvlan.c # SIOC[GS]ETVLAN support SRCS+= ifieee80211.c # SIOC[GS]IEEE80211 support +SRCS+= ifcarp.c # SIOC[GS]VH support +SRCS+= ifpfsync.c # pfsync(4) support + .if !defined(RELEASE_CRUNCH) SRCS+= af_ipx.c # IPX support DPADD= ${LIBIPX} diff --git a/sbin/ifconfig/ifcarp.c b/sbin/ifconfig/ifcarp.c new file mode 100644 index 000000000000..ab6d152d9bb0 --- /dev/null +++ b/sbin/ifconfig/ifcarp.c @@ -0,0 +1,199 @@ +/* $FreeBSD$ */ +/* from $OpenBSD: ifconfig.c,v 1.82 2003/10/19 05:43:35 mcbride Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ifconfig.h" + +static const char *carp_states[] = { CARP_STATES }; + +void carp_status(int s, const struct rt_addrinfo *); +void setcarp_advbase(const char *,int, int, const struct afswtch *rafp); +void setcarp_advskew(const char *, int, int, const struct afswtch *rafp); +void setcarp_passwd(const char *, int, int, const struct afswtch *rafp); +void setcarp_vhid(const char *, int, int, const struct afswtch *rafp); + +void +carp_status(int s, const struct rt_addrinfo *info __unused) +{ + const char *state; + struct carpreq carpr; + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + return; + + if (carpr.carpr_vhid > 0) { + if (carpr.carpr_state > CARP_MAXSTATE) + state = ""; + else + state = carp_states[carpr.carpr_state]; + + printf("\tcarp: %s vhid %d advbase %d advskew %d\n", + state, carpr.carpr_vhid, carpr.carpr_advbase, + carpr.carpr_advskew); + } + + return; + +} + +void +setcarp_passwd(const char *val, int d, int s, const struct afswtch *afp) +{ + struct carpreq carpr; + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + /* XXX Should hash the password into the key here, perhaps? */ + strlcpy(carpr.carpr_key, val, CARP_KEY_LEN); + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_vhid(const char *val, int d, int s, const struct afswtch *afp) +{ + int vhid; + struct carpreq carpr; + + vhid = atoi(val); + + if (vhid <= 0) + errx(1, "vhid must be greater than 0"); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_vhid = vhid; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_advskew(const char *val, int d, int s, const struct afswtch *afp) +{ + int advskew; + struct carpreq carpr; + + advskew = atoi(val); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_advskew = advskew; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +void +setcarp_advbase(const char *val, int d, int s, const struct afswtch *afp) +{ + int advbase; + struct carpreq carpr; + + advbase = atoi(val); + + memset((char *)&carpr, 0, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + carpr.carpr_advbase = advbase; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); + + return; +} + +static struct cmd carp_cmds[] = { + DEF_CMD_ARG("advbase", setcarp_advbase), + DEF_CMD_ARG("advskew", setcarp_advskew), + DEF_CMD_ARG("pass", setcarp_passwd), + DEF_CMD_ARG("vhid", setcarp_vhid), +}; +static struct afswtch af_carp = { + .af_name = "af_carp", + .af_af = AF_UNSPEC, + .af_status = carp_status, +}; + +static __constructor void +carp_ctor(void) +{ +#define N(a) (sizeof(a) / sizeof(a[0])) + int i; + + for (i = 0; i < N(carp_cmds); i++) + cmd_register(&carp_cmds[i]); + af_register(&af_carp); +#undef N +} diff --git a/sbin/ifconfig/ifpfsync.c b/sbin/ifconfig/ifpfsync.c new file mode 100644 index 000000000000..7ca9883519a8 --- /dev/null +++ b/sbin/ifconfig/ifpfsync.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * Copyright (c) 2004 Max Laier. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ifconfig.h" + +void setpfsync_syncif(const char *, int, int, const struct afswtch *rafp); +void unsetpfsync_syncif(const char *, int, int, const struct afswtch *rafp); +void setpfsync_maxupd(const char *, int, int, const struct afswtch *rafp); +void pfsync_status(int, const struct rt_addrinfo *); + +void +setpfsync_syncif(const char *val, int d, int s, const struct afswtch *rafp) +{ + struct pfsyncreq preq; + + bzero((char *)&preq, sizeof(struct pfsyncreq)); + ifr.ifr_data = (caddr_t)&preq; + + if (ioctl(s, SIOCGETPFSYNC, (caddr_t)&ifr) == -1) + err(1, "SIOCGETPFSYNC"); + + strlcpy(preq.pfsyncr_syncif, val, sizeof(preq.pfsyncr_syncif)); + + if (ioctl(s, SIOCSETPFSYNC, (caddr_t)&ifr) == -1) + err(1, "SIOCSETPFSYNC"); +} + +void +unsetpfsync_syncif(const char *val, int d, int s, const struct afswtch *rafp) +{ + struct pfsyncreq preq; + + bzero((char *)&preq, sizeof(struct pfsyncreq)); + ifr.ifr_data = (caddr_t)&preq; + + if (ioctl(s, SIOCGETPFSYNC, (caddr_t)&ifr) == -1) + err(1, "SIOCGETPFSYNC"); + + bzero((char *)&preq.pfsyncr_syncif, sizeof(preq.pfsyncr_syncif)); + + if (ioctl(s, SIOCSETPFSYNC, (caddr_t)&ifr) == -1) + err(1, "SIOCSETPFSYNC"); +} + +void +setpfsync_maxupd(const char *val, int d, int s, const struct afswtch *rafp) +{ + int maxupdates; + struct pfsyncreq preq; + + maxupdates = atoi(val); + + memset((char *)&preq, 0, sizeof(struct pfsyncreq)); + ifr.ifr_data = (caddr_t)&preq; + + if (ioctl(s, SIOCGETPFSYNC, (caddr_t)&ifr) == -1) + err(1, "SIOCGETPFSYNC"); + + preq.pfsyncr_maxupdates = maxupdates; + + if (ioctl(s, SIOCSETPFSYNC, (caddr_t)&ifr) == -1) + err(1, "SIOCSETPFSYNC"); +} + +void +pfsync_status(int s, const struct rt_addrinfo *info __unused) +{ + struct pfsyncreq preq; + + bzero((char *)&preq, sizeof(struct pfsyncreq)); + ifr.ifr_data = (caddr_t)&preq; + + if (ioctl(s, SIOCGETPFSYNC, (caddr_t)&ifr) == -1) + return; + + if (preq.pfsyncr_syncif[0] != '\0') { + printf("\tpfsync: syncif: %s maxupd: %d\n", + preq.pfsyncr_syncif, preq.pfsyncr_maxupdates); + } +} + +static struct cmd pfsync_cmds[] = { + DEF_CMD_ARG("syncif", setpfsync_syncif), + DEF_CMD_ARG("maxupd", setpfsync_maxupd), + DEF_CMD("-syncif", 1, unsetpfsync_syncif), +}; +static struct afswtch af_pfsync = { + .af_name = "af_pfsync", + .af_af = AF_UNSPEC, + .af_status = pfsync_status, +}; + +static __constructor void +pfsync_ctor(void) +{ +#define N(a) (sizeof(a) / sizeof(a[0])) + int i; + + for (i = 0; i < N(pfsync_cmds); i++) + cmd_register(&pfsync_cmds[i]); + af_register(&af_pfsync); +#undef N +} diff --git a/sys/conf/files b/sys/conf/files index daa8a9f0f6d5..1a9c67453559 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -279,6 +279,7 @@ crypto/rijndael/rijndael-api-fst.c optional geom_bde crypto/rijndael/rijndael-api-fst.c optional random crypto/rijndael/rijndael-api.c optional ipsec crypto/rijndael/rijndael-api.c optional wlan_ccmp +crypto/sha1.c optional carp crypto/sha1.c optional netgraph_mppc_encryption crypto/sha1.c optional crypto crypto/sha1.c optional ipsec @@ -1483,6 +1484,7 @@ netinet/if_atm.c optional atm netinet/if_ether.c optional ether netinet/igmp.c optional inet netinet/in.c optional inet +netinet/ip_carp.c optional carp netinet/in_gif.c optional gif inet netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet diff --git a/sys/conf/options b/sys/conf/options index f104f9405703..008c932448aa 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -610,6 +610,7 @@ NDEVFSOVERFLOW opt_devfs.h DEV_BPF opt_bpf.h DEV_ISA opt_isa.h DEV_MCA opt_mca.h +DEV_CARP opt_carp.h DEV_SPLASH opt_splash.h EISA_SLOTS opt_eisa.h diff --git a/sys/net/if.c b/sys/net/if.c index b6505dfe4a1a..3c25986f2dc3 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -34,6 +34,7 @@ #include "opt_inet6.h" #include "opt_inet.h" #include "opt_mac.h" +#include "opt_carp.h" #include #include @@ -78,6 +79,9 @@ #ifdef INET #include #endif +#ifdef DEV_CARP +#include +#endif void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); @@ -529,6 +533,12 @@ if_detach(struct ifnet *ifp) int found; EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); +#ifdef DEV_CARP + /* Maybe hook to the generalized departure handler above?!? */ + if (ifp->if_carp) + carp_ifdetach(ifp); +#endif + /* * Remove routes and flush queues. */ @@ -933,6 +943,10 @@ if_unroute(struct ifnet *ifp, int flag, int fam) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); if_qflush(&ifp->if_snd); +#ifdef DEV_CARP + if (ifp->if_carp) + carp_carpdev_state(ifp->if_carp); +#endif rt_ifmsg(ifp); } @@ -951,6 +965,10 @@ if_route(struct ifnet *ifp, int flag, int fam) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); +#ifdef DEV_CARP + if (ifp->if_carp) + carp_carpdev_state(ifp->if_carp); +#endif rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 1b973fec4a8a..2c4ff99f4a65 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -37,6 +37,7 @@ #include "opt_bdg.h" #include "opt_mac.h" #include "opt_netgraph.h" +#include "opt_carp.h" #include #include @@ -73,6 +74,10 @@ #include #endif +#ifdef DEV_CARP +#include +#endif + #ifdef IPX #include #include @@ -315,6 +320,12 @@ ether_output(struct ifnet *ifp, struct mbuf *m, } } +#ifdef DEV_CARP + if (ifp->if_carp && + (error = carp_output(ifp, m, dst, NULL))) + goto bad; +#endif + /* Handle ng_ether(4) processing, if any */ if (IFP2AC(ifp)->ac_netgraph != NULL) { if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { @@ -606,6 +617,18 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) if (!(BDG_ACTIVE(ifp)) && !((ether_type == ETHERTYPE_VLAN || m->m_flags & M_VLANTAG) && ifp->if_nvlans > 0)) { +#ifdef DEV_CARP + /* + * XXX: Okay, we need to call carp_forus() and - if it is for us + * jump over code that does the normal check + * "ac_enaddr == ether_dhost". The check sequence is a bit + * different from OpenBSD, so we jump over as few code as possible, + * to catch _all_ sanity checks. This needs evaluation, to see if + * the carp ether_dhost values break any of these checks! + */ + if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost)) + goto pre_stats; +#endif /* * Discard packet if upper layers shouldn't see it because it * was unicast to a different Ethernet address. If the driver @@ -628,6 +651,9 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) } } +#ifdef DEV_CARP +pre_stats: +#endif /* Discard packet if interface is not up */ if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); diff --git a/sys/net/if_media.h b/sys/net/if_media.h index 68a785da9f16..4f7389b3db9b 100644 --- a/sys/net/if_media.h +++ b/sys/net/if_media.h @@ -226,6 +226,11 @@ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr, #define IFM_ATM_NOSCRAMB 0x00000200 /* no scrambling */ #define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */ +/* + * CARP Common Address Redundancy Protocol + */ +#define IFM_CARP 0x000000c0 + /* * Shared media sub-types */ @@ -299,6 +304,7 @@ struct ifmedia_description { { IFM_FDDI, "FDDI" }, \ { IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \ { IFM_ATM, "ATM" }, \ + { IFM_CARP, "Common Address Redundancy Protocol" }, \ { 0, NULL }, \ } diff --git a/sys/net/if_types.h b/sys/net/if_types.h index 7af5bec09881..56dca4506331 100644 --- a/sys/net/if_types.h +++ b/sys/net/if_types.h @@ -247,4 +247,5 @@ #define IFT_FAITH 0xf2 #define IFT_PFLOG 0xf6 #define IFT_PFSYNC 0xf7 +#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */ #endif /* !_NET_IF_TYPES_H_ */ diff --git a/sys/net/if_var.h b/sys/net/if_var.h index d1d9fa2c21e9..78eea2c6bd3f 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -68,6 +68,7 @@ struct rtentry; struct rt_addrinfo; struct socket; struct ether_header; +struct carp_if; #endif #include /* get TAILQ macros */ @@ -146,7 +147,7 @@ struct ifnet { */ struct knlist if_klist; /* events attached to this if */ int if_pcount; /* number of promiscuous listeners */ - void *if_carp; /* carp (tbd) interface pointer */ + struct carp_if *if_carp; /* carp interface structure */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ short if_timer; /* time 'til if_watchdog called */ diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 2f1c0375258d..1bc1d060afaf 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -39,6 +39,7 @@ #include "opt_inet.h" #include "opt_bdg.h" #include "opt_mac.h" +#include "opt_carp.h" #include #include @@ -67,6 +68,10 @@ #include #include +#ifdef DEV_CARP +#include +#endif + #define SIN(s) ((struct sockaddr_in *)s) #define SDL(s) ((struct sockaddr_dl *)s) @@ -545,6 +550,7 @@ in_arpinput(m) struct sockaddr_dl *sdl; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; + u_int8_t *enaddr = NULL; int op, rif_len; int req_len; @@ -563,10 +569,18 @@ in_arpinput(m) * For a bridge, we want to check the address irrespective * of the receive interface. (This will change slightly * when we have clusters of interfaces). + * If the interface does not match, but the recieving interface + * is part of carp, we call carp_iamatch to see if this is a + * request for the virtual host ip. + * XXX: This is really ugly! */ LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) - if ((do_bridge || (ia->ia_ifp == ifp)) && - itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) + if ((do_bridge || (ia->ia_ifp == ifp) +#ifdef DEV_CARP + || (ifp->if_carp + && carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr)) +#endif + ) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) goto match; LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) if ((do_bridge || (ia->ia_ifp == ifp)) && @@ -587,8 +601,10 @@ in_arpinput(m) if (!do_bridge || (ia = TAILQ_FIRST(&in_ifaddrhead)) == NULL) goto drop; match: + if (!enaddr) + enaddr = (u_int8_t *)IF_LLADDR(ifp); myaddr = ia->ia_addr.sin_addr; - if (!bcmp(ar_sha(ah), IF_LLADDR(ifp), ifp->if_addrlen)) + if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) goto drop; /* it's from me, ignore it. */ if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) { log(LOG_ERR, @@ -711,7 +727,7 @@ reply: if (itaddr.s_addr == myaddr.s_addr) { /* I am the target */ (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); + (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); } else { la = arplookup(itaddr.s_addr, 0, SIN_PROXY); if (la == NULL) { @@ -738,7 +754,7 @@ reply: goto drop; } (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln); + (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); rtfree(rt); /* @@ -880,6 +896,19 @@ arp_ifinit(ifp, ifa) ifa->ifa_flags |= RTF_CLONING; } +void +arp_ifinit2(ifp, ifa, enaddr) + struct ifnet *ifp; + struct ifaddr *ifa; + u_char *enaddr; +{ + if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) + arprequest(ifp, &IA_SIN(ifa)->sin_addr, + &IA_SIN(ifa)->sin_addr, enaddr); + ifa->ifa_rtrequest = arp_rtrequest; + ifa->ifa_flags |= RTF_CLONING; +} + static void arp_init(void) { diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h index 9b60492de78e..14df15fcc17c 100644 --- a/sys/netinet/if_ether.h +++ b/sys/netinet/if_ether.h @@ -112,6 +112,7 @@ extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN]; int arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, struct sockaddr *dst, u_char *desten); void arp_ifinit(struct ifnet *, struct ifaddr *); +void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *); #endif #endif diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 20995bdef9af..45c269ca89ae 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -230,6 +230,7 @@ __END_DECLS #define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */ /* 101-254: Partly Unassigned */ #define IPPROTO_PIM 103 /* Protocol Independent Mcast */ +#define IPPROTO_CARP 112 /* CARP */ #define IPPROTO_PGM 113 /* PGM */ #define IPPROTO_PFSYNC 240 /* PFSYNC */ /* 255: Reserved */ @@ -357,6 +358,7 @@ __END_DECLS #define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */ #define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */ #define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */ +#define INADDR_CARP_GROUP (u_int32_t)0xe0000012 /* 224.0.0.18 */ #define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */ #define INADDR_ALLMDNS_GROUP (u_int32_t)0xe00000fb /* 224.0.0.251 */ #define INADDR_MAX_LOCAL_GROUP (u_int32_t)0xe00000ff /* 224.0.0.255 */ diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 9ae32c4db3ae..103682fd1f1b 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -35,6 +35,7 @@ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_pf.h" +#include "opt_carp.h" #include #include @@ -90,6 +91,10 @@ #include #endif +#ifdef DEV_CARP +#include +#endif + extern struct domain inetdomain; /* Spacer for loadable protocols. */ @@ -237,6 +242,14 @@ struct protosw inetsw[] = { &rip_usrreqs }, #endif /* DEV_PFSYNC */ +#ifdef DEV_CARP +{ SOCK_RAW, &inetdomain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR, + carp_input, (pr_output_t*)rip_output, 0, rip_ctloutput, + 0, + 0, 0, 0, 0, + &rip_usrreqs +}, +#endif /* DEV_CARP */ /* Spacer n-times for loadable protocols. */ IPPROTOSPACER, IPPROTOSPACER, @@ -290,3 +303,6 @@ SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, CTLFLAG_RW, 0, "RAW"); #ifdef PIM SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); #endif +#ifdef DEV_CARP +SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); +#endif diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c new file mode 100644 index 000000000000..ec54ef706223 --- /dev/null +++ b/sys/netinet/ip_carp.c @@ -0,0 +1,2032 @@ +/* $FreeBSD$ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_carp.h" +#include "opt_bpf.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef INET +#include +#include +#include +#include +#include +#include +#include +#endif + +#ifdef INET6 +#include +#include +#include +#include +#include +#endif + +#include +#include + +#define CARP_IFNAME "carp" +static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); +SYSCTL_DECL(_net_inet_carp); + +struct carp_softc { + struct arpcom sc_ac; /* Interface clue */ + int if_flags; /* UP/DOWN */ + struct ifnet *sc_ifp; /* Parent */ + struct in_ifaddr *sc_ia; /* primary iface address */ + struct ip_moptions sc_imo; +#ifdef INET6 + struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ + struct ip6_moptions sc_im6o; +#endif /* INET6 */ + TAILQ_ENTRY(carp_softc) sc_list; + + enum { INIT = 0, BACKUP, MASTER } sc_state; + + int sc_flags_backup; + int sc_suppress; + + int sc_sendad_errors; +#define CARP_SENDAD_MAX_ERRORS 3 + int sc_sendad_success; +#define CARP_SENDAD_MIN_SUCCESS 3 + + int sc_vhid; + int sc_advskew; + int sc_naddrs; + int sc_naddrs6; + int sc_advbase; /* seconds */ + int sc_init_counter; + u_int64_t sc_counter; + + /* authentication */ +#define CARP_HMAC_PAD 64 + unsigned char sc_key[CARP_KEY_LEN]; + unsigned char sc_pad[CARP_HMAC_PAD]; + SHA1_CTX sc_sha1; + + struct callout sc_ad_tmo; /* advertisement timeout */ + struct callout sc_md_tmo; /* master down timeout */ + struct callout sc_md6_tmo; /* master down timeout */ + + LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ +}; +#define sc_if sc_ac.ac_if + +int carp_suppress_preempt = 0; +int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ +SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, + &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); +SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, + &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); +SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, + &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); +SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, + &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); + +struct carpstats carpstats; +SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, + &carpstats, carpstats, + "CARP statistics (struct carpstats, netinet/ip_carp.h)"); + +struct carp_if { + TAILQ_HEAD(, carp_softc) vhif_vrs; + int vhif_nvrs; + + struct ifnet *vhif_ifp; + struct mtx vhif_mtx; +}; +/* lock per carp_if queue */ +#define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp", \ + NULL, MTX_DEF) +#define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif->vhif_mtx)) +#define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) +#define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) +#define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) + +#define CARP_LOG(sc, s) \ + if (carp_opts[CARPCTL_LOG]) { \ + if (sc != NULL) \ + log(LOG_INFO, "%s: ", (sc)->sc_if.if_xname); \ + else \ + log(LOG_INFO, "carp: "); \ + printf s; \ +/* addlog s; addlog("\n"); */ \ +} + +void carp_hmac_prepare(struct carp_softc *); +void carp_hmac_generate(struct carp_softc *, u_int32_t *, + unsigned char *); +int carp_hmac_verify(struct carp_softc *, u_int32_t *, + unsigned char *); +void carp_setroute(struct carp_softc *, int); +void carp_input_c(struct mbuf *, struct carp_softc *, + struct carp_header *, sa_family_t); +int carp_clone_create(struct if_clone *, int); +void carp_clone_destroy(struct ifnet *); +void carpdetach(struct carp_softc *); +int carp_prepare_ad(struct mbuf *, struct carp_softc *, + struct carp_header *); +void carp_send_ad_all(void); +void carp_send_ad(void *); +void carp_send_arp(struct carp_softc *); +void carp_master_down(void *); +int carp_ioctl(struct ifnet *, u_long, caddr_t); +static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +void carp_start(struct ifnet *); +void carp_setrun(struct carp_softc *, sa_family_t); +void carp_set_state(struct carp_softc *, int); +int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); +enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; + +int carp_set_addr(struct carp_softc *, struct sockaddr_in *); +int carp_del_addr(struct carp_softc *, struct sockaddr_in *); +#ifdef INET6 +void carp_send_na(struct carp_softc *); +int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); +int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); +#endif + +static LIST_HEAD(, carp_softc) carpif_list; +IFC_SIMPLE_DECLARE(carp, 0); + +static __inline u_int16_t +carp_cksum(struct mbuf *m, int len) +{ + return (in_cksum(m, len)); +} + +void +carp_hmac_prepare(struct carp_softc *sc) +{ + u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; + u_int8_t vhid = sc->sc_vhid & 0xff; + struct ifaddr *ifa; + int i; +#ifdef INET6 + struct in6_addr in6; +#endif + + /* compute ipad from key */ + bzero(sc->sc_pad, sizeof(sc->sc_pad)); + bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); + for (i = 0; i < sizeof(sc->sc_pad); i++) + sc->sc_pad[i] ^= 0x36; + + /* precompute first part of inner hash */ + SHA1Init(&sc->sc_sha1); + SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); + SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); + SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); + SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); +#ifdef INET + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET) + SHA1Update(&sc->sc_sha1, + (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, + sizeof(struct in_addr)); + } +#endif /* INET */ +#ifdef INET6 + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET6) { + in6 = ifatoia6(ifa)->ia_addr.sin6_addr; + if (IN6_IS_ADDR_LINKLOCAL(&in6)) + in6.s6_addr16[1] = 0; + SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); + } + } +#endif /* INET6 */ + + /* convert ipad to opad */ + for (i = 0; i < sizeof(sc->sc_pad); i++) + sc->sc_pad[i] ^= 0x36 ^ 0x5c; +} + +void +carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], + unsigned char md[20]) +{ + SHA1_CTX sha1ctx; + + /* fetch first half of inner hash */ + bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); + + SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); + SHA1Final(md, &sha1ctx); + + /* outer hash */ + SHA1Init(&sha1ctx); + SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); + SHA1Update(&sha1ctx, md, 20); + SHA1Final(md, &sha1ctx); +} + +int +carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], + unsigned char md[20]) +{ + unsigned char md2[20]; + + carp_hmac_generate(sc, counter, md2); + + return (bcmp(md, md2, sizeof(md2))); +} + +void +carp_setroute(struct carp_softc *sc, int cmd) +{ + struct ifaddr *ifa; + int s; + + s = splnet(); + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET && sc->sc_ifp != NULL) { + int count = carp_addrcount( + (struct carp_if *)sc->sc_ifp->if_carp, + ifatoia(ifa), CARP_COUNT_MASTER); + + if ((cmd == RTM_ADD && count == 1) || + (cmd == RTM_DELETE && count == 0)) + rtinit(ifa, cmd, RTF_UP | RTF_HOST); + } +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) { + if (cmd == RTM_ADD) + in6_ifaddloop(ifa); + else + in6_ifremloop(ifa); + } +#endif /* INET6 */ + } + splx(s); +} + +int +carp_clone_create(struct if_clone *ifc, int unit) +{ + + struct carp_softc *sc; + struct ifnet *ifp; + + MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + + sc->sc_flags_backup = 0; + sc->sc_suppress = 0; + sc->sc_advbase = CARP_DFLTINTV; + sc->sc_vhid = -1; /* required setting */ + sc->sc_advskew = 0; + sc->sc_init_counter = 1; + sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ +#ifdef INET6 + sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; +#endif + + callout_init(&sc->sc_ad_tmo, 0); + callout_init(&sc->sc_md_tmo, 0); + callout_init(&sc->sc_md6_tmo, 0); + + ifp = &sc->sc_if; + ifp->if_softc = sc; + if_initname(ifp, CARP_IFNAME, unit); + ifp->if_mtu = ETHERMTU; + ifp->if_flags = 0; + ifp->if_ioctl = carp_ioctl; + ifp->if_output = carp_looutput; + ifp->if_start = carp_start; + ifp->if_type = IFT_CARP; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = 0; + if_attach(ifp); + LIST_INSERT_HEAD(&carpif_list, sc, sc_next); + bpfattach(&sc->sc_if, DLT_LOOP, sizeof(u_int32_t)); + return (0); +} + +void +carp_clone_destroy(struct ifnet *ifp) +{ + struct carp_softc *sc = ifp->if_softc; + struct carp_if *cif; + struct ip_moptions *imo = &sc->sc_imo; +#ifdef INET6 + struct ip6_moptions *im6o = &sc->sc_im6o; +#endif + +/* carpdetach(sc); */ + + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + + if (imo->imo_num_memberships) { + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + } +#ifdef INET6 + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + struct in6_multi_mship *imm = + LIST_FIRST(&im6o->im6o_memberships); + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + im6o->im6o_multicast_ifp = NULL; +#endif + + /* Remove ourself from parents if_carp queue */ + if (sc->sc_ifp && (cif = sc->sc_ifp->if_carp)) { + CARP_LOCK(cif); + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + sc->sc_ifp->if_carp = NULL; + CARP_LOCK_DESTROY(cif); + FREE(cif, M_CARP); + } else { + CARP_UNLOCK(cif); + } + } + + bpfdetach(ifp); + if_detach(ifp); + LIST_REMOVE(sc, sc_next); + free(sc, M_CARP); +} + +/* + * process input packet. + * we have rearranged checks order compared to the rfc, + * but it seems more efficient this way or not possible otherwise. + */ +void +carp_input(struct mbuf *m, int hlen) +{ + struct carp_softc *sc = NULL; + struct ip *ip = mtod(m, struct ip *); + struct carp_header *ch; + int iplen, len; + + carpstats.carps_ipackets++; + + if (!carp_opts[CARPCTL_ALLOW]) { + m_freem(m); + return; + } + + /* check if received on a valid carp interface */ + if (m->m_pkthdr.rcvif->if_carp == NULL) { + carpstats.carps_badif++; + CARP_LOG(sc, ("packet received on non-carp interface: %s", + m->m_pkthdr.rcvif->if_xname)); + m_freem(m); + return; + } + + /* verify that the IP TTL is 255. */ + if (ip->ip_ttl != CARP_DFLTTL) { + carpstats.carps_badttl++; + CARP_LOG(sc, ("received ttl %d != 255i on %s", ip->ip_ttl, + m->m_pkthdr.rcvif->if_xname)); + m_freem(m); + return; + } + + iplen = ip->ip_hl << 2; + + if (m->m_pkthdr.len < iplen + sizeof(*ch)) { + carpstats.carps_badlen++; + CARP_LOG(sc, ("received len %d < sizeof(struct carp_header)", + m->m_len - sizeof(struct ip))); + m_freem(m); + return; + } + + if (iplen + sizeof(*ch) < m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { + carpstats.carps_hdrops++; + /* CARP_LOG ? */ + return; + } + ip = mtod(m, struct ip *); + } + ch = (struct carp_header *)((char *)ip + iplen); + + /* + * verify that the received packet length is + * equal to the CARP header + */ + len = iplen + sizeof(*ch); + if (len > m->m_pkthdr.len) { + carpstats.carps_badlen++; + CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len, + m->m_pkthdr.rcvif->if_xname)); + m_freem(m); + return; + } + + if ((m = m_pullup(m, len)) == NULL) { + carpstats.carps_hdrops++; + return; + } + ip = mtod(m, struct ip *); + ch = (struct carp_header *)((char *)ip + iplen); + + /* verify the CARP checksum */ + m->m_data += iplen; + if (carp_cksum(m, len - iplen)) { + carpstats.carps_badsum++; + CARP_LOG(sc, ("checksum failed on %s", + m->m_pkthdr.rcvif->if_xname)); + m_freem(m); + return; + } + m->m_data -= iplen; + + carp_input_c(m, sc, ch, AF_INET); +} + +#ifdef INET6 +int +carp6_input(struct mbuf **mp, int *offp, int proto) +{ + struct mbuf *m = *mp; + struct carp_softc *sc = NULL; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct carp_header *ch; + u_int len; + + carpstats.carps_ipackets6++; + + if (!carp_opts[CARPCTL_ALLOW]) { + m_freem(m); + return (IPPROTO_DONE); + } + + /* check if received on a valid carp interface */ + if (m->m_pkthdr.rcvif->if_carp == NULL) { + carpstats.carps_badif++; + CARP_LOG(sc, ("packet received on non-carp interface: %s", + m->m_pkthdr.rcvif->if_xname)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* verify that the IP TTL is 255 */ + if (ip6->ip6_hlim != CARP_DFLTTL) { + carpstats.carps_badttl++; + CARP_LOG(sc, ("received ttl %d != 255 on %s", ip6->ip6_hlim, + m->m_pkthdr.rcvif->if_xname)); + m_freem(m); + return (IPPROTO_DONE); + } + + /* verify that we have a complete carp packet */ + len = m->m_len; + IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); + if (ch == NULL) { + carpstats.carps_badlen++; + CARP_LOG(sc, ("packet size %u too small on %s", len, + m->m_pkthdr.rcvif->if_xname)); + return (IPPROTO_DONE); + } + + + /* verify the CARP checksum */ + m->m_data += *offp; + if (carp_cksum(m, sizeof(*ch))) { + carpstats.carps_badsum++; + CARP_LOG(sc, ("checksum failed, on %s", + m->m_pkthdr.rcvif->if_xname)); + m_freem(m); + return (IPPROTO_DONE); + } + m->m_data -= *offp; + + carp_input_c(m, sc, ch, AF_INET6); + return (IPPROTO_DONE); +} +#endif /* INET6 */ + +void +carp_input_c(struct mbuf *m, struct carp_softc *sc, + struct carp_header *ch, sa_family_t af) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + u_int64_t tmp_counter; + struct timeval sc_tv, ch_tv; + + /* verify that the VHID is valid on the receiving interface */ + CARP_LOCK(ifp->if_carp); + TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) + if (sc->sc_vhid == ch->carp_vhid) + break; + CARP_UNLOCK(ifp->if_carp); + if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != + (IFF_UP|IFF_RUNNING)) { + carpstats.carps_badvhid++; + m_freem(m); + return; + } + + getmicrotime(&sc->sc_if.if_lastchange); + sc->sc_if.if_ipackets++; + sc->sc_if.if_ibytes += m->m_pkthdr.len; + + if (sc->sc_if.if_bpf) { + /* + * We need to prepend the address family as + * a four byte field. Cons up a dummy header + * to pacify bpf. This is safe because bpf + * will only read from the mbuf (i.e., it won't + * try to free it or keep a pointer to it). + */ + struct mbuf m0; + struct ip *ip = mtod(m, struct ip *); + u_int32_t maf = htonl(af); + + m0.m_next = m; + m0.m_len = sizeof(maf); + m0.m_data = (char *)&maf; + /* BPF wants net byte order */ + ip->ip_len = htonl(ip->ip_len); + ip->ip_off = htonl(ip->ip_off); + BPF_MTAP(&sc->sc_if, &m0); + } + + /* verify the CARP version. */ + if (ch->carp_version != CARP_VERSION) { + carpstats.carps_badver++; + sc->sc_if.if_ierrors++; + CARP_LOG(sc, ("invalid version %d", ch->carp_version)); + m_freem(m); + return; + } + + /* verify the hash */ + if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { + carpstats.carps_badauth++; + sc->sc_if.if_ierrors++; + CARP_LOG(sc, ("incorrect hash")); + m_freem(m); + return; + } + + tmp_counter = ntohl(ch->carp_counter[0]); + tmp_counter = tmp_counter<<32; + tmp_counter += ntohl(ch->carp_counter[1]); + + /* XXX Replay protection goes here */ + + sc->sc_init_counter = 0; + sc->sc_counter = tmp_counter; + + sc_tv.tv_sec = sc->sc_advbase; + if (carp_suppress_preempt && sc->sc_advskew < 240) + sc_tv.tv_usec = 240 * 1000000 / 256; + else + sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; + ch_tv.tv_sec = ch->carp_advbase; + ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; + + switch (sc->sc_state) { + case INIT: + break; + case MASTER: + /* + * If we receive an advertisement from a master who's going to + * be more frequent than us, go into BACKUP state. + */ + if (timevalcmp(&sc_tv, &ch_tv, >) || + timevalcmp(&sc_tv, &ch_tv, ==)) { + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP); + carp_setrun(sc, 0); + carp_setroute(sc, RTM_DELETE); + } + break; + case BACKUP: + /* + * If we're pre-empting masters who advertise slower than us, + * and this one claims to be slower, treat him as down. + */ + if (carp_opts[CARPCTL_PREEMPT] && + timevalcmp(&sc_tv, &ch_tv, <)) { + carp_master_down(sc); + break; + } + + /* + * If the master is going to advertise at such a low frequency + * that he's guaranteed to time out, we'd might as well just + * treat him as timed out now. + */ + sc_tv.tv_sec = sc->sc_advbase * 3; + if (timevalcmp(&sc_tv, &ch_tv, <)) { + carp_master_down(sc); + break; + } + + /* + * Otherwise, we reset the counter and wait for the next + * advertisement. + */ + carp_setrun(sc, af); + break; + } + + m_freem(m); + return; +} + +void +carpdetach(struct carp_softc *sc) +{ + struct ifaddr *ifa; + + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + + while ((ifa = TAILQ_FIRST(&sc->sc_if.if_addrlist)) != NULL) + if (ifa->ifa_addr->sa_family == AF_INET) { + struct in_ifaddr *ia = ifatoia(ifa); + + carp_del_addr(sc, &ia->ia_addr); + + /* ripped screaming from in_control(SIOCDIFADDR) */ + in_ifscrub(&sc->sc_if, ia); + TAILQ_REMOVE(&sc->sc_if.if_addrlist, ifa, ifa_link); + TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); + IFAFREE((&ia->ia_ifa)); + } +} + +/* Detach an interface from the carp. */ +void +carp_ifdetach(struct ifnet *ifp) +{ + struct carp_softc *sc; + struct carp_if *cif = (struct carp_if *)ifp->if_carp; + + CARP_LOCK(cif); + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) + carpdetach(sc); + CARP_UNLOCK(cif); +} + +int +carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) +{ + struct m_tag *mtag; + struct ifnet *ifp = &sc->sc_if; + + if (sc->sc_init_counter) { + /* this could also be seconds since unix epoch */ + sc->sc_counter = arc4random(); + sc->sc_counter = sc->sc_counter << 32; + sc->sc_counter += arc4random(); + } else + sc->sc_counter++; + + ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); + ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); + + carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); + + /* Tag packet for carp_output */ + mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + sc->sc_if.if_oerrors++; + return (ENOMEM); + } + bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); + m_tag_prepend(m, mtag); + + return (0); +} + +void +carp_send_ad_all(void) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct carp_softc *vh; + + TAILQ_FOREACH(ifp, &ifnet, if_list) { + if (ifp->if_carp == NULL) + continue; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) && + vh->sc_state == MASTER) + carp_send_ad(vh); + } + CARP_UNLOCK(cif); + } +} + +void +carp_send_ad(void *v) +{ + struct carp_header ch; + struct timeval tv; + struct carp_softc *sc = v; + struct carp_header *ch_ptr; + struct mbuf *m; + int len, advbase, advskew; + + /* bow out if we've lost our UPness or RUNNINGuiness */ + if ((sc->sc_if.if_flags & + (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { + advbase = 255; + advskew = 255; + } else { + advbase = sc->sc_advbase; + if (!carp_suppress_preempt || sc->sc_advskew > 240) + advskew = sc->sc_advskew; + else + advskew = 240; + tv.tv_sec = advbase; + tv.tv_usec = advskew * 1000000 / 256; + } + + ch.carp_version = CARP_VERSION; + ch.carp_type = CARP_ADVERTISEMENT; + ch.carp_vhid = sc->sc_vhid; + ch.carp_advbase = advbase; + ch.carp_advskew = advskew; + ch.carp_authlen = 7; /* XXX DEFINE */ + ch.carp_pad1 = 0; /* must be zero */ + ch.carp_cksum = 0; + +#ifdef INET + if (sc->sc_ia) { + struct ip *ip; + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) { + sc->sc_ac.ac_if.if_oerrors++; + carpstats.carps_onomem++; + /* XXX maybe less ? */ + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + return; + } + len = sizeof(*ip) + sizeof(ch); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + MH_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_len = len; + ip->ip_id = ip_newid(); + ip->ip_off = IP_DF; + ip->ip_ttl = CARP_DFLTTL; + ip->ip_p = IPPROTO_CARP; + ip->ip_sum = 0; + ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; + ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); + + ch_ptr = (struct carp_header *)(&ip[1]); + bcopy(&ch, ch_ptr, sizeof(ch)); + if (carp_prepare_ad(m, sc, ch_ptr)) + return; + + m->m_data += sizeof(*ip); + ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); + m->m_data -= sizeof(*ip); + + getmicrotime(&sc->sc_if.if_lastchange); + sc->sc_ac.ac_if.if_opackets++; + sc->sc_ac.ac_if.if_obytes += len; + carpstats.carps_opackets++; + + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { + sc->sc_if.if_oerrors++; + if (sc->sc_sendad_errors < INT_MAX) + sc->sc_sendad_errors++; + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) + carp_send_ad_all(); + } + sc->sc_sendad_success = 0; + } else { + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { + if (++sc->sc_sendad_success >= + CARP_SENDAD_MIN_SUCCESS) { + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + } + } else + sc->sc_sendad_errors = 0; + } + } +#endif /* INET */ +#ifdef INET6 + if (sc->sc_ia6) { + struct ip6_hdr *ip6; + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) { + sc->sc_ac.ac_if.if_oerrors++; + carpstats.carps_onomem++; + /* XXX maybe less ? */ + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + return; + } + len = sizeof(*ip6) + sizeof(ch); + m->m_pkthdr.len = len; + m->m_pkthdr.rcvif = NULL; + m->m_len = len; + MH_ALIGN(m, m->m_len); + m->m_flags |= M_MCAST; + ip6 = mtod(m, struct ip6_hdr *); + bzero(ip6, sizeof(*ip6)); + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_hlim = CARP_DFLTTL; + ip6->ip6_nxt = IPPROTO_CARP; + bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, + sizeof(struct in6_addr)); + /* set the multicast destination */ + + ip6->ip6_dst.s6_addr8[0] = 0xff; + ip6->ip6_dst.s6_addr8[1] = 0x02; + ip6->ip6_dst.s6_addr8[15] = 0x12; + + ch_ptr = (struct carp_header *)(&ip6[1]); + bcopy(&ch, ch_ptr, sizeof(ch)); + if (carp_prepare_ad(m, sc, ch_ptr)) + return; + + m->m_data += sizeof(*ip6); + ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); + m->m_data -= sizeof(*ip6); + + getmicrotime(&sc->sc_if.if_lastchange); + sc->sc_if.if_opackets++; + sc->sc_if.if_obytes += len; + carpstats.carps_opackets6++; + + if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { + sc->sc_if.if_oerrors++; + if (sc->sc_sendad_errors < INT_MAX) + sc->sc_sendad_errors++; + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) + carp_send_ad_all(); + } + sc->sc_sendad_success = 0; + } else { + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { + if (++sc->sc_sendad_success >= + CARP_SENDAD_MIN_SUCCESS) { + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + } + } else + sc->sc_sendad_errors = 0; + } + } +#endif /* INET6 */ + + if (advbase != 255 || advskew != 255) + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + +} + +/* + * Broadcast a gratuitous ARP request containing + * the virtual router MAC address for each IP address + * associated with the virtual router. + */ +void +carp_send_arp(struct carp_softc *sc) +{ + struct ifaddr *ifa; + + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + +/* arprequest(sc->sc_ifp, &in, &in, sc->sc_ac.ac_enaddr); */ + arp_ifinit2(sc->sc_ifp, ifa, sc->sc_ac.ac_enaddr); + + DELAY(1000); /* XXX */ + } +} + +#ifdef INET6 +void +carp_send_na(struct carp_softc *sc) +{ + struct ifaddr *ifa; + struct in6_addr *in6; + static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; + + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + + in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; + nd6_na_output(sc->sc_ifp, &mcast, in6, + ND_NA_FLAG_OVERRIDE, 1, NULL); + DELAY(1000); /* XXX */ + } +} +#endif /* INET6 */ + +int +carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) +{ + struct carp_softc *vh; + struct ifaddr *ifa; + int count = 0; + + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((type == CARP_COUNT_RUNNING && + (vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING)) || + (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { + TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist, + ifa_list) { + if (ifa->ifa_addr->sa_family == AF_INET && + ia->ia_addr.sin_addr.s_addr == + ifatoia(ifa)->ia_addr.sin_addr.s_addr) + count++; + } + } + } + return (count); +} + +int +carp_iamatch(void *v, struct in_ifaddr *ia, + struct in_addr *isaddr, u_int8_t **enaddr) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + int index, count = 0; + struct ifaddr *ifa; + + CARP_LOCK(cif); + + if (carp_opts[CARPCTL_ARPBALANCE]) { + /* + * XXX proof of concept implementation. + * We use the source ip to decide which virtual host should + * handle the request. If we're master of that virtual host, + * then we respond, otherwise, just drop the arp packet on + * the floor. + */ + count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); + if (count == 0) { + /* should never reach this */ + CARP_UNLOCK(cif); + return (0); + } + + /* this should be a hash, like pf_hash() */ + index = isaddr->s_addr % count; + count = 0; + + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING)) { + TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, + ifa_list) { + if (ifa->ifa_addr->sa_family == + AF_INET && + ia->ia_addr.sin_addr.s_addr == + ifatoia(ifa)->ia_addr.sin_addr.s_addr) { + if (count == index) { + if (vh->sc_state == + MASTER) { + *enaddr = vh->sc_ac.ac_enaddr; + CARP_UNLOCK(cif); + return (1); + } else { + CARP_UNLOCK(cif); + return (0); + } + } + count++; + } + } + } + } + } else { + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING) && ia->ia_ifp == + &vh->sc_if) { + *enaddr = vh->sc_ac.ac_enaddr; + CARP_UNLOCK(cif); + return (1); + } + } + } + CARP_UNLOCK(cif); + return (0); +} + +#ifdef INET6 +struct ifaddr * +carp_iamatch6(void *v, struct in6_addr *taddr) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + struct ifaddr *ifa; + + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) { + if (IN6_ARE_ADDR_EQUAL(taddr, + &ifatoia6(ifa)->ia_addr.sin6_addr) && + ((vh->sc_if.if_flags & + (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { + CARP_UNLOCK(cif); + return (ifa); + } + } + } + CARP_UNLOCK(cif); + + return (NULL); +} + +void * +carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) +{ + struct m_tag *mtag; + struct carp_if *cif = v; + struct carp_softc *sc; + struct ifaddr *ifa; + + CARP_LOCK(cif); + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { + TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { + if (IN6_ARE_ADDR_EQUAL(taddr, + &ifatoia6(ifa)->ia_addr.sin6_addr) && + ((sc->sc_if.if_flags & + (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { + struct ifnet *ifp = &sc->sc_if; + mtag = m_tag_get(PACKET_TAG_CARP, + sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) { + /* better a bit than nothing */ + CARP_UNLOCK(cif); + return (sc->sc_ac.ac_enaddr); + } + bcopy(&ifp, (caddr_t)(mtag + 1), + sizeof(struct ifnet *)); + m_tag_prepend(m, mtag); + + CARP_UNLOCK(cif); + return (sc->sc_ac.ac_enaddr); + } + } + } + CARP_UNLOCK(cif); + + return (NULL); +} +#endif + +struct ifnet * +carp_forus(void *v, void *dhost) +{ + struct carp_if *cif = v; + struct carp_softc *vh; + u_int8_t *ena = dhost; + + if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) + return (NULL); + + CARP_LOCK(cif); + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) + if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == + (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER && + !bcmp(dhost, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) { + CARP_UNLOCK(cif); + return (&vh->sc_if); + } + + CARP_UNLOCK(cif); + return (NULL); +} + +void +carp_master_down(void *v) +{ + struct carp_softc *sc = v; + + switch (sc->sc_state) { + case INIT: + printf("%s: master_down event in INIT state\n", + sc->sc_if.if_xname); + break; + case MASTER: + break; + case BACKUP: + carp_set_state(sc, MASTER); + carp_send_ad(sc); + carp_send_arp(sc); +#ifdef INET6 + carp_send_na(sc); +#endif /* INET6 */ + carp_setrun(sc, 0); + carp_setroute(sc, RTM_ADD); + break; + } +} + +/* + * When in backup state, af indicates whether to reset the master down timer + * for v4 or v6. If it's set to zero, reset the ones which are already pending. + */ +void +carp_setrun(struct carp_softc *sc, sa_family_t af) +{ + struct timeval tv; + + if (sc->sc_if.if_flags & IFF_UP && + sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) + sc->sc_if.if_flags |= IFF_RUNNING; + else { + sc->sc_if.if_flags &= ~IFF_RUNNING; + carp_setroute(sc, RTM_DELETE); + return; + } + + switch (sc->sc_state) { + case INIT: + if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { + carp_send_ad(sc); + carp_send_arp(sc); +#ifdef INET6 + carp_send_na(sc); +#endif /* INET6 */ + carp_set_state(sc, MASTER); + carp_setroute(sc, RTM_ADD); + } else { + carp_set_state(sc, BACKUP); + carp_setroute(sc, RTM_DELETE); + carp_setrun(sc, 0); + } + break; + case BACKUP: + callout_stop(&sc->sc_ad_tmo); + tv.tv_sec = 3 * sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + switch (af) { +#ifdef INET + case AF_INET: + callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + carp_master_down, sc); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + carp_master_down, sc); + break; +#endif /* INET6 */ + default: + if (sc->sc_naddrs) + callout_reset(&sc->sc_md_tmo, tvtohz(&tv), + carp_master_down, sc); + if (sc->sc_naddrs6) + callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), + carp_master_down, sc); + break; + } + break; + case MASTER: + tv.tv_sec = sc->sc_advbase; + tv.tv_usec = sc->sc_advskew * 1000000 / 256; + callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), + carp_send_ad, sc); + break; + } +} + +int +carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct in_ifaddr *ia, *ia_if; + struct ip_moptions *imo = &sc->sc_imo; + struct in_addr addr; + u_long iaddr = htonl(sin->sin_addr.s_addr); + int own, error; + + if (sin->sin_addr.s_addr == 0) { + if (!(sc->sc_if.if_flags & IFF_UP)) + carp_set_state(sc, INIT); + if (sc->sc_naddrs) + sc->sc_if.if_flags |= IFF_UP; + carp_setrun(sc, 0); + return (0); + } + + /* we have to do it by hands to check we won't match on us */ + ia_if = NULL; own = 0; + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + /* and, yeah, we need a multicast-capable iface too */ + if (ia->ia_ifp != &sc->sc_if && + (ia->ia_ifp->if_flags & IFF_MULTICAST) && + (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { + if (!ia_if) + ia_if = ia; + if (sin->sin_addr.s_addr == + ia->ia_addr.sin_addr.s_addr) + own++; + } + } + + if (!ia_if) + return (EADDRNOTAVAIL); + + ia = ia_if; + ifp = ia->ia_ifp; + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || + (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) + return (EADDRNOTAVAIL); + + if (imo->imo_num_memberships == 0) { + addr.s_addr = htonl(INADDR_CARP_GROUP); + if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) + return (ENOBUFS); + imo->imo_num_memberships++; + imo->imo_multicast_ifp = ifp; + imo->imo_multicast_ttl = CARP_DFLTTL; + imo->imo_multicast_loop = 0; + } + + if (!ifp->if_carp) { + + MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, + M_WAITOK|M_ZERO); + if (!cif) { + error = ENOBUFS; + goto cleanup; + } + if ((error = ifpromisc(ifp, 1))) { + FREE(cif, M_CARP); + goto cleanup; + } + + CARP_LOCK_INIT(cif); + CARP_LOCK(cif); + cif->vhif_ifp = ifp; + TAILQ_INIT(&cif->vhif_vrs); + ifp->if_carp = cif; + + } else { + struct carp_softc *vr; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && vr->sc_vhid == sc->sc_vhid) { + CARP_UNLOCK(cif); + error = EINVAL; + goto cleanup; + } + } + sc->sc_ia = ia; + sc->sc_ifp = ifp; + + { /* XXX prevent endless loop if already in queue */ + struct carp_softc *vr, *after = NULL; + int myself = 0; + cif = (struct carp_if *)ifp->if_carp; + + /* XXX: cif should not change, right? So we still hold the lock */ + CARP_LOCK_ASSERT(cif); + + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr == sc) + myself = 1; + if (vr->sc_vhid < sc->sc_vhid) + after = vr; + } + + if (!myself) { + /* We're trying to keep things in order */ + if (after == NULL) { + TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); + } else { + TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); + } + cif->vhif_nvrs++; + } + } + + CARP_UNLOCK(cif); + + sc->sc_naddrs++; + sc->sc_if.if_flags |= IFF_UP; + if (own) + sc->sc_advskew = 0; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + + return (0); + +cleanup: + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + return (error); +} + +int +carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) +{ + int error = 0; + + if (!--sc->sc_naddrs) { + struct carp_if *cif = (struct carp_if *)sc->sc_ifp->if_carp; + struct ip_moptions *imo = &sc->sc_imo; + + callout_stop(&sc->sc_ad_tmo); + sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); + sc->sc_vhid = -1; + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + CARP_LOCK(cif); + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + sc->sc_ifp->if_carp = NULL; + CARP_LOCK_DESTROY(cif); + FREE(cif, M_IFADDR); + } else { + CARP_UNLOCK(cif); + } + } + + return (error); +} + +#ifdef INET6 +int +carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) +{ + struct ifnet *ifp; + struct carp_if *cif; + struct in6_ifaddr *ia, *ia_if; + struct ip6_moptions *im6o = &sc->sc_im6o; + struct in6_multi_mship *imm; + struct sockaddr_in6 addr; + int own, error; + + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + if (!(sc->sc_if.if_flags & IFF_UP)) + carp_set_state(sc, INIT); + if (sc->sc_naddrs6) + sc->sc_if.if_flags |= IFF_UP; + carp_setrun(sc, 0); + return (0); + } + + /* we have to do it by hands to check we won't match on us */ + ia_if = NULL; own = 0; + for (ia = in6_ifaddr; ia; ia = ia->ia_next) { + int i; + + for (i = 0; i < 4; i++) { + if ((sin6->sin6_addr.s6_addr32[i] & + ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != + (ia->ia_addr.sin6_addr.s6_addr32[i] & + ia->ia_prefixmask.sin6_addr.s6_addr32[i])) + break; + } + /* and, yeah, we need a multicast-capable iface too */ + if (ia->ia_ifp != &sc->sc_ac.ac_if && + (ia->ia_ifp->if_flags & IFF_MULTICAST) && + (i == 4)) { + if (!ia_if) + ia_if = ia; + if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, + &ia->ia_addr.sin6_addr)) + own++; + } + } + + if (!ia_if) + return (EADDRNOTAVAIL); + ia = ia_if; + ifp = ia->ia_ifp; + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || + (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) + return (EADDRNOTAVAIL); + + if (!sc->sc_naddrs6) { + im6o->im6o_multicast_ifp = ifp; + + /* join CARP multicast address */ + bzero(&addr, sizeof(addr)); + addr.sin6_family = AF_INET6; + addr.sin6_len = sizeof(addr); + addr.sin6_addr.s6_addr16[0] = htons(0xff02); + addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + addr.sin6_addr.s6_addr8[15] = 0x12; + if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) + goto cleanup; + LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); + + /* join solicited multicast address */ + bzero(&addr.sin6_addr, sizeof(addr.sin6_addr)); + addr.sin6_addr.s6_addr16[0] = htons(0xff02); + addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + addr.sin6_addr.s6_addr32[1] = 0; + addr.sin6_addr.s6_addr32[2] = htonl(1); + addr.sin6_addr.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; + addr.sin6_addr.s6_addr8[12] = 0xff; + if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL) + goto cleanup; + LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); + } + + if (!ifp->if_carp) { + MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP, + M_WAITOK|M_ZERO); + if (!cif) { + error = ENOBUFS; + goto cleanup; + } + if ((error = ifpromisc(ifp, 1))) { + FREE(cif, M_CARP); + goto cleanup; + } + + CARP_LOCK_INIT(cif); + CARP_LOCK(cif); + cif->vhif_ifp = ifp; + TAILQ_INIT(&cif->vhif_vrs); + ifp->if_carp = cif; + + } else { + struct carp_softc *vr; + + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && vr->sc_vhid == sc->sc_vhid) { + CARP_UNLOCK(cif); + error = EINVAL; + goto cleanup; + } + } + sc->sc_ia6 = ia; + sc->sc_ifp = ifp; + + { /* XXX prevent endless loop if already in queue */ + struct carp_softc *vr, *after = NULL; + int myself = 0; + cif = (struct carp_if *)ifp->if_carp; + CARP_LOCK_ASSERT(cif); + + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr == sc) + myself = 1; + if (vr->sc_vhid < sc->sc_vhid) + after = vr; + } + + if (!myself) { + /* We're trying to keep things in order */ + if (after == NULL) { + TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); + } else { + TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); + } + cif->vhif_nvrs++; + } + } + + CARP_UNLOCK(cif); + sc->sc_naddrs6++; + sc->sc_ac.ac_if.if_flags |= IFF_UP; + if (own) + sc->sc_advskew = 0; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + + return (0); + +cleanup: + /* clean up multicast memberships */ + if (!sc->sc_naddrs6) { + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + imm = LIST_FIRST(&im6o->im6o_memberships); + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + } + return (error); +} + +int +carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) +{ + int error = 0; + + if (!--sc->sc_naddrs6) { + struct carp_if *cif = (struct carp_if *)sc->sc_ifp->if_carp; + struct ip6_moptions *im6o = &sc->sc_im6o; + + callout_stop(&sc->sc_ad_tmo); + sc->sc_ac.ac_if.if_flags &= ~(IFF_UP|IFF_RUNNING); + sc->sc_vhid = -1; + CARP_LOCK(cif); + while (!LIST_EMPTY(&im6o->im6o_memberships)) { + struct in6_multi_mship *imm = + LIST_FIRST(&im6o->im6o_memberships); + + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + im6o->im6o_multicast_ifp = NULL; + TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); + if (!--cif->vhif_nvrs) { + CARP_LOCK_DESTROY(cif); + sc->sc_ifp->if_carp = NULL; + FREE(cif, M_IFADDR); + } else + CARP_UNLOCK(cif); + } + + return (error); +} +#endif /* INET6 */ + +int +carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) +{ + struct carp_softc *sc = ifp->if_softc, *vr; + struct carpreq carpr; + struct ifaddr *ifa; + struct ifreq *ifr; + struct ifaliasreq *ifra; + int error = 0; + + ifa = (struct ifaddr *)addr; + ifra = (struct ifaliasreq *)addr; + ifr = (struct ifreq *)addr; + + switch (cmd) { + case SIOCSIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + sc->sc_if.if_flags |= IFF_UP; + bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, + sizeof(struct sockaddr)); + error = carp_set_addr(sc, satosin(ifa->ifa_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + sc->sc_if.if_flags |= IFF_UP; + error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCAIFADDR: + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + sc->sc_if.if_flags |= IFF_UP; + bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, + sizeof(struct sockaddr)); + error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + sc->sc_if.if_flags |= IFF_UP; + error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCDIFADDR: + sc->if_flags &= ~IFF_UP; + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCSIFFLAGS: + if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { + sc->if_flags &= ~IFF_UP; + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + if (sc->sc_state == MASTER) + carp_send_ad(sc); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { + sc->sc_if.if_flags |= IFF_UP; + carp_setrun(sc, 0); + } + break; + + case SIOCSVH: + if ((error = suser(curthread)) != 0) + break; + if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) + break; + error = 1; + if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { + switch (carpr.carpr_state) { + case BACKUP: + callout_stop(&sc->sc_ad_tmo); + carp_set_state(sc, BACKUP); + carp_setrun(sc, 0); + carp_setroute(sc, RTM_DELETE); + break; + case MASTER: + carp_master_down(sc); + break; + default: + break; + } + } + if (carpr.carpr_vhid > 0) { + if (carpr.carpr_vhid > 255) { + error = EINVAL; + break; + } + if (sc->sc_ifp) { + struct carp_if *cif; + cif = (struct carp_if *)sc->sc_ifp->if_carp; + CARP_LOCK(cif); + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) + if (vr != sc && + vr->sc_vhid == carpr.carpr_vhid) { + CARP_UNLOCK(cif); + return EINVAL; + } + CARP_UNLOCK(cif); + } + sc->sc_vhid = carpr.carpr_vhid; + sc->sc_ac.ac_enaddr[0] = 0; + sc->sc_ac.ac_enaddr[1] = 0; + sc->sc_ac.ac_enaddr[2] = 0x5e; + sc->sc_ac.ac_enaddr[3] = 0; + sc->sc_ac.ac_enaddr[4] = 1; + sc->sc_ac.ac_enaddr[5] = sc->sc_vhid; + error--; + } + if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { + if (carpr.carpr_advskew >= 255) { + error = EINVAL; + break; + } + if (carpr.carpr_advbase > 255) { + error = EINVAL; + break; + } + sc->sc_advbase = carpr.carpr_advbase; + sc->sc_advskew = carpr.carpr_advskew; + error--; + } + bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); + if (error > 0) + error = EINVAL; + else { + error = 0; + carp_setrun(sc, 0); + } + break; + + case SIOCGVH: + bzero(&carpr, sizeof(carpr)); + carpr.carpr_state = sc->sc_state; + carpr.carpr_vhid = sc->sc_vhid; + carpr.carpr_advbase = sc->sc_advbase; + carpr.carpr_advskew = sc->sc_advskew; + if (suser(curthread) == 0) + bcopy(sc->sc_key, carpr.carpr_key, + sizeof(carpr.carpr_key)); + error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); + break; + + default: + error = EINVAL; + } + + carp_hmac_prepare(sc); + return (error); +} + +/* + * XXX: this is looutput. We should eventually use it from there. + */ +static int +carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + M_ASSERTPKTHDR(m); /* check if we have the packet header */ + + if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + m_freem(m); + return (rt->rt_flags & RTF_BLACKHOLE ? 0 : + rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + } + + ifp->if_opackets++; + ifp->if_obytes += m->m_pkthdr.len; +#if 1 /* XXX */ + switch (dst->sa_family) { + case AF_INET: + case AF_INET6: + case AF_IPX: + case AF_APPLETALK: + break; + default: + printf("carp_looutput: af=%d unexpected\n", dst->sa_family); + m_freem(m); + return (EAFNOSUPPORT); + } +#endif + return(if_simloop(ifp, m, dst->sa_family, 0)); +} + +/* + * Start output on carp interface. This function should never be called. + */ +void +carp_start(struct ifnet *ifp) +{ +#ifdef DEBUG + printf("%s: start called\n", ifp->if_xname); +#endif +} + +int +carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, + struct rtentry *rt) +{ + struct m_tag *mtag; + struct carp_softc *sc; + struct ifnet *carp_ifp; + + if (!sa) + return (0); + + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + break; +#endif /* INET6 */ + default: + return (0); + } + + mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); + if (mtag == NULL) + return (0); + + bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); + sc = carp_ifp->if_softc; + + /* Set the source MAC address to Virtual Router MAC Address */ + switch (ifp->if_type) { + case IFT_ETHER: { + struct ether_header *eh; + + eh = mtod(m, struct ether_header *); + eh->ether_shost[0] = 0; + eh->ether_shost[1] = 0; + eh->ether_shost[2] = 0x5e; + eh->ether_shost[3] = 0; + eh->ether_shost[4] = 1; + eh->ether_shost[5] = sc->sc_vhid; + } + break; + case IFT_FDDI: { + struct fddi_header *fh; + + fh = mtod(m, struct fddi_header *); + fh->fddi_shost[0] = 0; + fh->fddi_shost[1] = 0; + fh->fddi_shost[2] = 0x5e; + fh->fddi_shost[3] = 0; + fh->fddi_shost[4] = 1; + fh->fddi_shost[5] = sc->sc_vhid; + } + break; + case IFT_ISO88025: { + struct iso88025_header *th; + th = mtod(m, struct iso88025_header *); + th->iso88025_shost[0] = 3; + th->iso88025_shost[1] = 0; + th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); + th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); + th->iso88025_shost[4] = 0; + th->iso88025_shost[5] = 0; + } + break; + default: + printf("%s: carp is not supported for this interface type\n", + ifp->if_xname); + return (EOPNOTSUPP); + } + + return (0); +} + +void +carp_set_state(struct carp_softc *sc, int state) +{ + if (sc->sc_state == state) + return; + + sc->sc_state = state; + switch (state) { + case BACKUP: + sc->sc_ac.ac_if.if_link_state = LINK_STATE_DOWN; + break; + case MASTER: + sc->sc_ac.ac_if.if_link_state = LINK_STATE_UP; + break; + default: + sc->sc_ac.ac_if.if_link_state = LINK_STATE_UNKNOWN; + break; + } + rt_ifmsg(&sc->sc_ac.ac_if); +} + +void +carp_carpdev_state(void *v) +{ + struct carp_if *cif = v; + struct carp_softc *sc; + + CARP_LOCK(cif); + TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { + if (sc->sc_ifp->if_link_state == LINK_STATE_DOWN || + !(sc->sc_ifp->if_flags & IFF_UP)) { + sc->sc_flags_backup = sc->sc_if.if_flags; + sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + if (!sc->sc_suppress) { + carp_suppress_preempt++; + if (carp_suppress_preempt == 1) + carp_send_ad_all(); + } + sc->sc_suppress = 1; + } else { + sc->sc_if.if_flags |= sc->sc_flags_backup; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + if (sc->sc_suppress) + carp_suppress_preempt--; + sc->sc_suppress = 0; + } + } + CARP_UNLOCK(cif); +} + +static int +carp_modevent(module_t mod, int type, void *data) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + LIST_INIT(&carpif_list); + if_clone_attach(&carp_cloner); + printf("carp: attached\n"); + break; + + case MOD_UNLOAD: + if_clone_detach(&carp_cloner); + while (!LIST_EMPTY(&carpif_list)) + carp_clone_destroy( + &LIST_FIRST(&carpif_list)->sc_if); + break; + + default: + error = EINVAL; + break; + } + + return error; +} + +static moduledata_t carp_mod = { + "carp", + carp_modevent, + 0 +}; + +DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h new file mode 100644 index 000000000000..a050a88e3991 --- /dev/null +++ b/sys/netinet/ip_carp.h @@ -0,0 +1,163 @@ +/* $FreeBSD$ */ +/* $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */ + +/* + * Copyright (c) 2002 Michael Shalayeff. All rights reserved. + * Copyright (c) 2003 Ryan McBride. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IP_CARP_H +#define _IP_CARP_H + +/* + * The CARP header layout is as follows: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Version| Type | VirtualHostID | AdvSkew | Auth Len | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | AdvBase | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Counter (1) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Counter (2) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (1) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (2) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (3) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (4) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | SHA-1 HMAC (5) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +struct carp_header { +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t carp_type:4, + carp_version:4; +#endif +#if BYTE_ORDER == BIG_ENDIAN + u_int8_t carp_version:4, + carp_type:4; +#endif + u_int8_t carp_vhid; /* virtual host id */ + u_int8_t carp_advskew; /* advertisement skew */ + u_int8_t carp_authlen; /* size of counter+md, 32bit chunks */ + u_int8_t carp_pad1; /* reserved */ + u_int8_t carp_advbase; /* advertisement interval */ + u_int16_t carp_cksum; + u_int32_t carp_counter[2]; + unsigned char carp_md[20]; /* SHA1 HMAC */ +} __packed; + +#define CARP_DFLTTL 255 + +/* carp_version */ +#define CARP_VERSION 2 + +/* carp_type */ +#define CARP_ADVERTISEMENT 0x01 + +#define CARP_KEY_LEN 20 /* a sha1 hash of a passphrase */ + +/* carp_advbase */ +#define CARP_DFLTINTV 1 + +/* + * Statistics. + */ +struct carpstats { + uint64_t carps_ipackets; /* total input packets, IPv4 */ + uint64_t carps_ipackets6; /* total input packets, IPv6 */ + uint64_t carps_badif; /* wrong interface */ + uint64_t carps_badttl; /* TTL is not CARP_DFLTTL */ + uint64_t carps_hdrops; /* packets shorter than hdr */ + uint64_t carps_badsum; /* bad checksum */ + uint64_t carps_badver; /* bad (incl unsupp) version */ + uint64_t carps_badlen; /* data length does not match */ + uint64_t carps_badauth; /* bad authentication */ + uint64_t carps_badvhid; /* bad VHID */ + uint64_t carps_badaddrs; /* bad address list */ + + uint64_t carps_opackets; /* total output packets, IPv4 */ + uint64_t carps_opackets6; /* total output packets, IPv6 */ + uint64_t carps_onomem; /* no memory for an mbuf */ + uint64_t carps_ostates; /* total state updates sent */ + + uint64_t carps_preempt; /* if enabled, preemptions */ +}; + +/* + * Configuration structure for SIOCSVH SIOCGVH + */ +struct carpreq { + int carpr_state; +#define CARP_STATES "INIT", "BACKUP", "MASTER" +#define CARP_MAXSTATE 2 + int carpr_vhid; + int carpr_advskew; + int carpr_advbase; + unsigned char carpr_key[CARP_KEY_LEN]; +}; +#define SIOCSVH _IOWR('i', 245, struct ifreq) +#define SIOCGVH _IOWR('i', 246, struct ifreq) + +/* + * Names for CARP sysctl objects + */ +#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */ +#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */ +#define CARPCTL_LOG 3 /* log bad packets */ +#define CARPCTL_STATS 4 /* statistics (read-only) */ +#define CARPCTL_ARPBALANCE 5 /* balance arp responses */ +#define CARPCTL_MAXID 6 + +#define CARPCTL_NAMES { \ + { 0, 0 }, \ + { "allow", CTLTYPE_INT }, \ + { "preempt", CTLTYPE_INT }, \ + { "log", CTLTYPE_INT }, \ + { "stats", CTLTYPE_STRUCT }, \ + { "arpbalance", CTLTYPE_INT }, \ +} + +#ifdef _KERNEL +void carp_ifdetach (struct ifnet *); +void carp_carpdev_state(void *); +void carp_input (struct mbuf *, int); +int carp6_input (struct mbuf **, int *, int); +int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +int carp_iamatch (void *, struct in_ifaddr *, struct in_addr *, + u_int8_t **); +struct ifaddr *carp_iamatch6(void *, struct in6_addr *); +void *carp_macmatch6(void *, struct mbuf *, const struct in6_addr *); +struct ifnet *carp_forus (void *, void *); +#endif +#endif /* _IP_CARP_H */ diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 3021285bfde1..ecf79aef7041 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -35,6 +35,7 @@ #include "opt_ipstealth.h" #include "opt_ipsec.h" #include "opt_mac.h" +#include "opt_carp.h" #include #include @@ -66,6 +67,9 @@ #include #include #include +#ifdef DEV_CARP +#include +#endif #include @@ -509,10 +513,17 @@ passin: * XXX - Checking is incompatible with IP aliases added * to the loopback interface instead of the interface where * the packets are received. + * + * XXX - This is the case for carp vhost IPs as well so we + * insert a workaround. If the packet got here, we already + * checked with carp_iamatch() and carp_forus(). */ checkif = ip_checkinterface && (ipforwarding == 0) && m->m_pkthdr.rcvif != NULL && ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) && +#ifdef DEV_CARP + !m->m_pkthdr.rcvif->if_carp && +#endif (dchg == 0); /* diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 163dbfe8b245..7bd5e6464ada 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -206,7 +206,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) * rely on the cloning mechanism from the corresponding interface route * any more. */ -static void +void in6_ifaddloop(struct ifaddr *ifa) { struct rtentry *rt; @@ -226,7 +226,7 @@ in6_ifaddloop(struct ifaddr *ifa) * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(), * if it exists. */ -static void +void in6_ifremloop(struct ifaddr *ifa) { struct in6_ifaddr *ia; @@ -1551,6 +1551,39 @@ in6_ifinit(ifp, ia, sin6, newhost) return (error); } +struct in6_multi_mship * +in6_joingroup(ifp, addr, errorp) + struct ifnet *ifp; + struct in6_addr *addr; + int *errorp; +{ + struct in6_multi_mship *imm; + + imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT); + if (!imm) { + *errorp = ENOBUFS; + return NULL; + } + imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp); + if (!imm->i6mm_maddr) { + /* *errorp is alrady set */ + free(imm, M_IPMADDR); + return NULL; + } + return imm; +} + +int +in6_leavegroup(imm) + struct in6_multi_mship *imm; +{ + + if (imm->i6mm_maddr) + in6_delmulti(imm->i6mm_maddr); + free(imm, M_IPMADDR); + return 0; +} + /* * Find an IPv6 interface link-local address specific to an interface. */ diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index fe12e4bf1f34..8837a91ee375 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -671,6 +671,7 @@ in6_ifattach(ifp, altifp) #endif case IFT_PFLOG: case IFT_PFSYNC: + case IFT_CARP: return; } diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 129c5f3a57f1..790030383669 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -64,6 +64,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_carp.h" #include #include @@ -121,6 +122,10 @@ #endif #endif /* IPSEC */ +#ifdef DEV_CARP +#include +#endif + #ifdef FAST_IPSEC #include #define IPSEC @@ -241,6 +246,14 @@ struct ip6protosw inet6sw[] = { 0, 0, 0, 0, &rip6_usrreqs }, +#ifdef DEV_CARP +{ SOCK_RAW, &inet6domain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR, + carp6_input, rip6_output, 0, rip6_ctloutput, + 0, + 0, 0, 0, 0, + &rip6_usrreqs +}, +#endif /* DEV_CARP */ /* raw wildcard */ { SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR, rip6_input, rip6_output, 0, rip6_ctloutput, diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index c89a9b59b05c..05ec3d591816 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -578,6 +578,8 @@ do { \ struct in6_multi *in6_addmulti __P((struct in6_addr *, struct ifnet *, int *)); void in6_delmulti __P((struct in6_multi *)); +struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *, int *); +int in6_leavegroup(struct in6_multi_mship *); int in6_mask2len __P((struct in6_addr *, u_char *)); int in6_control __P((struct socket *, u_long, caddr_t, struct ifnet *, struct thread *)); @@ -604,6 +606,8 @@ int in6_prefix_ioctl __P((struct socket *, u_long, caddr_t, int in6_prefix_add_ifid __P((int, struct in6_ifaddr *)); void in6_prefix_remove_ifid __P((int, struct in6_ifaddr *)); void in6_purgeprefix __P((struct ifnet *)); +void in6_ifremloop(struct ifaddr *); +void in6_ifaddloop(struct ifaddr *); int in6_is_addr_deprecated __P((struct sockaddr_in6 *)); struct inpcb; diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 2c639b0761e1..e274966d4b0e 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -2024,6 +2024,9 @@ nd6_need_cache(ifp) #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: +#endif +#ifdef IFT_CARP + case IFT_CARP: #endif case IFT_GIF: /* XXX need more cases? */ return (1); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 3abcf36faf7c..e428bd42aef6 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -32,6 +32,8 @@ #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ipsec.h" +#include "opt_carp.h" #include #include @@ -59,6 +61,10 @@ #include #include +#ifdef DEV_CARP +#include +#endif + #include #define SDL(s) ((struct sockaddr_dl *)s) @@ -94,7 +100,7 @@ nd6_ns_input(m, off, icmp6len) struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; @@ -193,7 +199,14 @@ nd6_ns_input(m, off, icmp6len) * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ +#ifdef DEV_CARP + if (ifp->if_carp) + ifa = carp_iamatch6(ifp->if_carp, &taddr6); + if (!ifa) + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); +#else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); +#endif /* (2) check. */ if (!ifa) { @@ -888,9 +901,16 @@ nd6_na_output(ifp, daddr6, taddr6, flags, tlladdr, sdl0) * lladdr in sdl0. If we are not proxying (sending NA for * my address) use lladdr configured for the interface. */ - if (sdl0 == NULL) + if (sdl0 == NULL) { +#ifdef DEV_CARP + if (ifp->if_carp) + mac = carp_macmatch6(ifp->if_carp, m, taddr6); + if (mac == NULL) + mac = nd6_ifptomac(ifp); +#else mac = nd6_ifptomac(ifp); - else if (sdl0->sa_family == AF_LINK) { +#endif + } else if (sdl0->sa_family == AF_LINK) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)sdl0; if (sdl->sdl_alen == ifp->if_addrlen) @@ -942,6 +962,9 @@ nd6_ifptomac(ifp) #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: +#endif +#ifdef IFT_CARP + case IFT_CARP: #endif case IFT_ISO88025: return ((caddr_t)(ifp + 1)); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index c954f92088af..1621fa716252 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -664,6 +664,7 @@ struct mbuf *m_uiotombuf(struct uio *, int, int); #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_PF_TRANSLATE_LOCALHOST 26 /* PF translate localhost */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ +#define PACKET_TAG_CARP 28 /* CARP info */ /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index ea679f81af7a..6590848e904b 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef INET6 #include #endif /* INET6 */ @@ -525,6 +526,50 @@ udp_stats(u_long off __unused, const char *name, int af1 __unused) #undef p1a } +/* + * Dump CARP statistics structure. + */ +void +carp_stats(u_long off, const char *name, int af1 __unused) +{ + struct carpstats carpstat, zerostat; + size_t len = sizeof(struct carpstats); + + if (zflag) + memset(&zerostat, 0, len); + if (sysctlbyname("net.inet.carp.stats", &carpstat, &len, + zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { + warn("sysctl: net.inet.carp.stats"); + return; + } + + printf("%s:\n", name); + +#define p(f, m) if (carpstat.f || sflag <= 1) \ + printf(m, (unsigned long long)carpstat.f, plural((int)carpstat.f)) +#define p2(f, m) if (carpstat.f || sflag <= 1) \ + printf(m, (unsigned long long)carpstat.f) + + p(carps_ipackets, "\t%llu packet%s received (IPv4)\n"); + p(carps_ipackets6, "\t%llu packet%s received (IPv6)\n"); + p(carps_badttl, "\t\t%llu packet%s discarded for wrong TTL\n"); + p(carps_hdrops, "\t\t%llu packet%s shorter than header\n"); + p(carps_badsum, "\t\t%llu discarded for bad checksum%s\n"); + p(carps_badver, "\t\t%llu discarded packet%s with a bad version\n"); + p2(carps_badlen, "\t\t%llu discarded because packet too short\n"); + p2(carps_badauth, "\t\t%llu discarded for bad authentication\n"); + p2(carps_badvhid, "\t\t%llu discarded for bad vhid\n"); + p2(carps_badaddrs, "\t\t%llu discarded because of a bad address list\n"); + p(carps_opackets, "\t%llu packet%s sent (IPv4)\n"); + p(carps_opackets6, "\t%llu packet%s sent (IPv6)\n"); + p2(carps_onomem, "\t\t%llu send failed due to mbuf memory error\n"); +#if notyet + p(carps_ostates, "\t\t%s state update%s sent\n"); +#endif +#undef p +#undef p2 +} + /* * Dump IP statistics structure. */ diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c index e5cd2fdfa6a2..fc32e82acfa6 100644 --- a/usr.bin/netstat/main.c +++ b/usr.bin/netstat/main.c @@ -136,6 +136,8 @@ static struct nlist nl[] = { { "_mbuf_lowm" }, #define N_CLLO 32 { "_clust_lowm" }, +#define N_CARPSTAT 33 + { "_carpstats" }, { "" }, }; @@ -171,6 +173,8 @@ struct protox { bdg_stats, NULL, "bdg", 1 /* bridging... */ }, { -1, -1, 1, protopr, pim_stats, NULL, "pim", IPPROTO_PIM }, + { -1, N_CARPSTAT, 1, 0, + carp_stats, NULL, "carp", 0}, { -1, -1, 0, NULL, NULL, NULL, NULL, 0 } }; diff --git a/usr.bin/netstat/netstat.h b/usr.bin/netstat/netstat.h index e2b3f291a6e5..3cafa26de4d2 100644 --- a/usr.bin/netstat/netstat.h +++ b/usr.bin/netstat/netstat.h @@ -71,6 +71,7 @@ void ip_stats(u_long, const char *, int); void icmp_stats(u_long, const char *, int); void igmp_stats(u_long, const char *, int); void pim_stats(u_long, const char *, int); +void carp_stats (u_long, const char *, int); #ifdef IPSEC void ipsec_stats(u_long, const char *, int); #endif