src/sys/netinet/ip_mroute.c

1362 lines
32 KiB
C

/* $OpenBSD: ip_mroute.c,v 1.142 2024/04/06 14:23:27 bluhm Exp $ */
/* $NetBSD: ip_mroute.c,v 1.85 2004/04/26 01:31:57 matt Exp $ */
/*
* Copyright (c) 1989 Stephen Deering
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Stephen Deering of Stanford University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
*/
/*
* IP multicast forwarding procedures
*
* Written by David Waitzman, BBN Labs, August 1988.
* Modified by Steve Deering, Stanford, February 1989.
* Modified by Mark J. Steiglitz, Stanford, May, 1991
* Modified by Van Jacobson, LBL, January 1993
* Modified by Ajit Thyagarajan, PARC, August 1993
* Modified by Bill Fenner, PARC, April 1994
* Modified by Charles M. Hannum, NetBSD, May 1995.
* Modified by Ahmed Helmy, SGI, June 1996
* Modified by George Edmond Eddy (Rusty), ISI, February 1998
* Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
* Modified by Hitoshi Asaeda, WIDE, August 2000
* Modified by Pavlin Radoslavov, ICSI, October 2002
*
* MROUTING Revision: 1.2
* advanced API support, bandwidth metering and signaling
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/ioctl.h>
#include <sys/syslog.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_pcb.h>
#include <netinet/igmp.h>
#include <netinet/ip_mroute.h>
/* #define MCAST_DEBUG */
#ifdef MCAST_DEBUG
int mcast_debug = 1;
#define DPRINTF(fmt, args...) \
do { \
if (mcast_debug) \
printf("%s:%d " fmt "\n", \
__func__, __LINE__, ## args); \
} while (0)
#else
#define DPRINTF(fmt, args...) \
do { } while (0)
#endif
/*
* Globals. All but ip_mrouter and ip_mrtproto could be static,
* except for netstat or debugging purposes.
*/
struct socket *ip_mrouter[RT_TABLEID_MAX + 1];
struct rttimer_queue ip_mrouterq;
uint64_t mrt_count[RT_TABLEID_MAX + 1];
int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
struct mrtstat mrtstat;
struct rtentry *mfc_find(struct ifnet *, struct in_addr *,
struct in_addr *, unsigned int);
int get_sg_cnt(unsigned int, struct sioc_sg_req *);
int get_vif_cnt(unsigned int, struct sioc_vif_req *);
int mrt_rtwalk_mfcsysctl(struct rtentry *, void *, unsigned int);
int ip_mrouter_init(struct socket *, struct mbuf *);
int mrouter_rtwalk_delete(struct rtentry *, void *, unsigned int);
int get_version(struct mbuf *);
int add_vif(struct socket *, struct mbuf *);
int del_vif(struct socket *, struct mbuf *);
void update_mfc_params(struct mfcctl2 *, int, unsigned int);
int mfc_add(struct mfcctl2 *, struct in_addr *, struct in_addr *,
int, unsigned int, int);
int add_mfc(struct socket *, struct mbuf *);
int del_mfc(struct socket *, struct mbuf *);
int set_api_config(struct socket *, struct mbuf *); /* chose API capabilities */
int get_api_support(struct mbuf *);
int get_api_config(struct mbuf *);
int socket_send(struct socket *, struct mbuf *,
struct sockaddr_in *);
int ip_mdq(struct mbuf *, struct ifnet *, struct rtentry *);
struct ifnet *if_lookupbyvif(vifi_t, unsigned int);
struct rtentry *rt_mcast_add(struct ifnet *, struct sockaddr *,
struct sockaddr *);
void mrt_mcast_del(struct rtentry *, unsigned int);
/*
* Kernel multicast routing API capabilities and setup.
* If more API capabilities are added to the kernel, they should be
* recorded in `mrt_api_support'.
*/
static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
MRT_MFC_RP);
static u_int32_t mrt_api_config = 0;
/*
* Find a route for a given origin IP address and Multicast group address
* Type of service parameter to be added in the future!!!
* Statistics are updated by the caller if needed
* (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
*/
struct rtentry *
mfc_find(struct ifnet *ifp, struct in_addr *origin, struct in_addr *group,
unsigned int rtableid)
{
struct rtentry *rt;
struct sockaddr_in msin;
memset(&msin, 0, sizeof(msin));
msin.sin_len = sizeof(msin);
msin.sin_family = AF_INET;
msin.sin_addr = *group;
rt = rtalloc(sintosa(&msin), 0, rtableid);
do {
if (!rtisvalid(rt)) {
rtfree(rt);
return NULL;
}
/* Don't consider non multicast routes. */
if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
(RTF_HOST | RTF_MULTICAST))
continue;
/* Return first occurrence if interface is not specified. */
if (ifp == NULL)
return (rt);
if (rt->rt_ifidx == ifp->if_index)
return (rt);
} while ((rt = rtable_iterate(rt)) != NULL);
return (NULL);
}
/*
* Handle MRT setsockopt commands to modify the multicast routing tables.
*/
int
ip_mrouter_set(struct socket *so, int optname, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
int error;
if (optname != MRT_INIT &&
so != ip_mrouter[inp->inp_rtableid])
error = ENOPROTOOPT;
else
switch (optname) {
case MRT_INIT:
error = ip_mrouter_init(so, m);
break;
case MRT_DONE:
error = ip_mrouter_done(so);
break;
case MRT_ADD_VIF:
error = add_vif(so, m);
break;
case MRT_DEL_VIF:
error = del_vif(so, m);
break;
case MRT_ADD_MFC:
error = add_mfc(so, m);
break;
case MRT_DEL_MFC:
error = del_mfc(so, m);
break;
case MRT_API_CONFIG:
error = set_api_config(so, m);
break;
default:
error = ENOPROTOOPT;
break;
}
return (error);
}
/*
* Handle MRT getsockopt commands
*/
int
ip_mrouter_get(struct socket *so, int optname, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
int error;
if (so != ip_mrouter[inp->inp_rtableid])
error = ENOPROTOOPT;
else {
switch (optname) {
case MRT_VERSION:
error = get_version(m);
break;
case MRT_API_SUPPORT:
error = get_api_support(m);
break;
case MRT_API_CONFIG:
error = get_api_config(m);
break;
default:
error = ENOPROTOOPT;
break;
}
}
return (error);
}
/*
* Handle ioctl commands to obtain information from the cache
*/
int
mrt_ioctl(struct socket *so, u_long cmd, caddr_t data)
{
struct inpcb *inp = sotoinpcb(so);
int error;
if (inp == NULL)
return (ENOTCONN);
KERNEL_LOCK();
if (so != ip_mrouter[inp->inp_rtableid])
error = EINVAL;
else
switch (cmd) {
case SIOCGETVIFCNT:
NET_LOCK_SHARED();
error = get_vif_cnt(inp->inp_rtableid,
(struct sioc_vif_req *)data);
NET_UNLOCK_SHARED();
break;
case SIOCGETSGCNT:
NET_LOCK_SHARED();
error = get_sg_cnt(inp->inp_rtableid,
(struct sioc_sg_req *)data);
NET_UNLOCK_SHARED();
break;
default:
error = ENOTTY;
break;
}
KERNEL_UNLOCK();
return (error);
}
/*
* returns the packet, byte, rpf-failure count for the source group provided
*/
int
get_sg_cnt(unsigned int rtableid, struct sioc_sg_req *req)
{
struct rtentry *rt;
struct mfc *mfc;
rt = mfc_find(NULL, &req->src, &req->grp, rtableid);
if (rt == NULL) {
req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
return (EADDRNOTAVAIL);
}
req->pktcnt = req->bytecnt = req->wrong_if = 0;
do {
/* Don't consider non multicast routes. */
if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
(RTF_HOST | RTF_MULTICAST))
continue;
mfc = (struct mfc *)rt->rt_llinfo;
if (mfc == NULL)
continue;
req->pktcnt += mfc->mfc_pkt_cnt;
req->bytecnt += mfc->mfc_byte_cnt;
req->wrong_if += mfc->mfc_wrong_if;
} while ((rt = rtable_iterate(rt)) != NULL);
return (0);
}
/*
* returns the input and output packet and byte counts on the vif provided
*/
int
get_vif_cnt(unsigned int rtableid, struct sioc_vif_req *req)
{
struct ifnet *ifp;
struct vif *v;
vifi_t vifi = req->vifi;
if ((ifp = if_lookupbyvif(vifi, rtableid)) == NULL)
return (EINVAL);
v = (struct vif *)ifp->if_mcast;
req->icount = v->v_pkt_in;
req->ocount = v->v_pkt_out;
req->ibytes = v->v_bytes_in;
req->obytes = v->v_bytes_out;
return (0);
}
int
mrt_sysctl_vif(void *oldp, size_t *oldlenp)
{
caddr_t where = oldp;
size_t needed, given;
struct ifnet *ifp;
struct vif *vifp;
struct vifinfo vinfo;
given = *oldlenp;
needed = 0;
memset(&vinfo, 0, sizeof vinfo);
TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
if ((vifp = (struct vif *)ifp->if_mcast) == NULL)
continue;
vinfo.v_vifi = vifp->v_id;
vinfo.v_flags = vifp->v_flags;
vinfo.v_threshold = vifp->v_threshold;
vinfo.v_lcl_addr = vifp->v_lcl_addr;
vinfo.v_rmt_addr = vifp->v_rmt_addr;
vinfo.v_pkt_in = vifp->v_pkt_in;
vinfo.v_pkt_out = vifp->v_pkt_out;
vinfo.v_bytes_in = vifp->v_bytes_in;
vinfo.v_bytes_out = vifp->v_bytes_out;
needed += sizeof(vinfo);
if (where && needed <= given) {
int error;
error = copyout(&vinfo, where, sizeof(vinfo));
if (error)
return (error);
where += sizeof(vinfo);
}
}
if (where) {
*oldlenp = needed;
if (given < needed)
return (ENOMEM);
} else
*oldlenp = (11 * needed) / 10;
return (0);
}
struct mfcsysctlarg {
struct mfcinfo *msa_minfos;
size_t msa_len;
size_t msa_needed;
};
int
mrt_rtwalk_mfcsysctl(struct rtentry *rt, void *arg, unsigned int rtableid)
{
struct mfc *mfc = (struct mfc *)rt->rt_llinfo;
struct mfcsysctlarg *msa = (struct mfcsysctlarg *)arg;
struct ifnet *ifp;
struct vif *v;
struct mfcinfo *minfo;
int new = 0;
/* Skip entries being removed. */
if (mfc == NULL)
return (0);
/* Skip non-multicast routes. */
if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
(RTF_HOST | RTF_MULTICAST))
return (0);
/* User just asked for the output size. */
if (msa->msa_minfos == NULL) {
msa->msa_needed += sizeof(*minfo);
return (0);
}
/* Skip route with invalid interfaces. */
if ((ifp = if_get(rt->rt_ifidx)) == NULL)
return (0);
if ((v = (struct vif *)ifp->if_mcast) == NULL) {
if_put(ifp);
return (0);
}
for (minfo = msa->msa_minfos;
(uint8_t *)(minfo + 1) <=
(uint8_t *)msa->msa_minfos + msa->msa_len;
minfo++) {
/* Find a new entry or update old entry. */
if (minfo->mfc_origin.s_addr !=
satosin(rt->rt_gateway)->sin_addr.s_addr ||
minfo->mfc_mcastgrp.s_addr !=
satosin(rt_key(rt))->sin_addr.s_addr) {
if (minfo->mfc_origin.s_addr != 0 ||
minfo->mfc_mcastgrp.s_addr != 0)
continue;
new = 1;
}
minfo->mfc_origin = satosin(rt->rt_gateway)->sin_addr;
minfo->mfc_mcastgrp = satosin(rt_key(rt))->sin_addr;
minfo->mfc_parent = mfc->mfc_parent;
minfo->mfc_pkt_cnt += mfc->mfc_pkt_cnt;
minfo->mfc_byte_cnt += mfc->mfc_byte_cnt;
minfo->mfc_ttls[v->v_id] = mfc->mfc_ttl;
break;
}
if (new != 0)
msa->msa_needed += sizeof(*minfo);
if_put(ifp);
return (0);
}
int
mrt_sysctl_mfc(void *oldp, size_t *oldlenp)
{
unsigned int rtableid;
int error;
struct mfcsysctlarg msa;
if (oldp != NULL && *oldlenp > MAXPHYS)
return (EINVAL);
memset(&msa, 0, sizeof(msa));
if (oldp != NULL && *oldlenp > 0) {
msa.msa_minfos = malloc(*oldlenp, M_TEMP, M_WAITOK | M_ZERO);
msa.msa_len = *oldlenp;
}
for (rtableid = 0; rtableid <= RT_TABLEID_MAX; rtableid++) {
rtable_walk(rtableid, AF_INET, NULL, mrt_rtwalk_mfcsysctl,
&msa);
}
if (msa.msa_minfos != NULL && msa.msa_needed > 0 &&
(error = copyout(msa.msa_minfos, oldp, msa.msa_needed)) != 0) {
free(msa.msa_minfos, M_TEMP, msa.msa_len);
return (error);
}
free(msa.msa_minfos, M_TEMP, msa.msa_len);
*oldlenp = msa.msa_needed;
return (0);
}
/*
* Enable multicast routing
*/
int
ip_mrouter_init(struct socket *so, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
unsigned int rtableid = inp->inp_rtableid;
int *v;
if (so->so_type != SOCK_RAW ||
so->so_proto->pr_protocol != IPPROTO_IGMP)
return (EOPNOTSUPP);
if (m == NULL || m->m_len < sizeof(int))
return (EINVAL);
v = mtod(m, int *);
if (*v != 1)
return (EINVAL);
if (ip_mrouter[rtableid] != NULL)
return (EADDRINUSE);
ip_mrouter[rtableid] = so;
return (0);
}
int
mrouter_rtwalk_delete(struct rtentry *rt, void *arg, unsigned int rtableid)
{
/* Skip non-multicast routes. */
if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
(RTF_HOST | RTF_MULTICAST))
return (0);
return EEXIST;
}
/*
* Disable multicast routing
*/
int
ip_mrouter_done(struct socket *so)
{
struct inpcb *inp = sotoinpcb(so);
struct ifnet *ifp;
unsigned int rtableid = inp->inp_rtableid;
int error;
NET_ASSERT_LOCKED();
/* Delete all remaining installed multicast routes. */
do {
struct rtentry *rt = NULL;
error = rtable_walk(rtableid, AF_INET, &rt,
mrouter_rtwalk_delete, NULL);
if (rt != NULL && error == EEXIST) {
mrt_mcast_del(rt, rtableid);
error = EAGAIN;
}
rtfree(rt);
} while (error == EAGAIN);
/* Unregister all interfaces in the domain. */
TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
if (ifp->if_rdomain != rtableid)
continue;
vif_delete(ifp);
}
mrt_api_config = 0;
ip_mrouter[rtableid] = NULL;
mrt_count[rtableid] = 0;
return (0);
}
int
get_version(struct mbuf *m)
{
int *v = mtod(m, int *);
*v = 0x0305; /* XXX !!!! */
m->m_len = sizeof(int);
return (0);
}
/*
* Configure API capabilities
*/
int
set_api_config(struct socket *so, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
struct ifnet *ifp;
u_int32_t *apival;
unsigned int rtableid = inp->inp_rtableid;
if (m == NULL || m->m_len < sizeof(u_int32_t))
return (EINVAL);
apival = mtod(m, u_int32_t *);
/*
* We can set the API capabilities only if it is the first operation
* after MRT_INIT. I.e.:
* - there are no vifs installed
* - the MFC table is empty
*/
TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
if (ifp->if_rdomain != rtableid)
continue;
if (ifp->if_mcast == NULL)
continue;
*apival = 0;
return (EPERM);
}
if (mrt_count[rtableid] > 0) {
*apival = 0;
return (EPERM);
}
mrt_api_config = *apival & mrt_api_support;
*apival = mrt_api_config;
return (0);
}
/*
* Get API capabilities
*/
int
get_api_support(struct mbuf *m)
{
u_int32_t *apival;
if (m == NULL || m->m_len < sizeof(u_int32_t))
return (EINVAL);
apival = mtod(m, u_int32_t *);
*apival = mrt_api_support;
return (0);
}
/*
* Get API configured capabilities
*/
int
get_api_config(struct mbuf *m)
{
u_int32_t *apival;
if (m == NULL || m->m_len < sizeof(u_int32_t))
return (EINVAL);
apival = mtod(m, u_int32_t *);
*apival = mrt_api_config;
return (0);
}
static struct sockaddr_in sin = { sizeof(sin), AF_INET };
int
add_vif(struct socket *so, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
struct vifctl *vifcp;
struct vif *vifp;
struct ifaddr *ifa;
struct ifnet *ifp;
struct ifreq ifr;
int error;
unsigned int rtableid = inp->inp_rtableid;
NET_ASSERT_LOCKED();
if (m == NULL || m->m_len < sizeof(struct vifctl))
return (EINVAL);
vifcp = mtod(m, struct vifctl *);
if (vifcp->vifc_vifi >= MAXVIFS)
return (EINVAL);
if (in_nullhost(vifcp->vifc_lcl_addr))
return (EADDRNOTAVAIL);
if (if_lookupbyvif(vifcp->vifc_vifi, rtableid) != NULL)
return (EADDRINUSE);
/* Tunnels are no longer supported use gif(4) instead. */
if (vifcp->vifc_flags & VIFF_TUNNEL)
return (EOPNOTSUPP);
{
sin.sin_addr = vifcp->vifc_lcl_addr;
ifa = ifa_ifwithaddr(sintosa(&sin), rtableid);
if (ifa == NULL)
return (EADDRNOTAVAIL);
}
/* Use the physical interface associated with the address. */
ifp = ifa->ifa_ifp;
if (ifp->if_mcast != NULL)
return (EADDRINUSE);
{
/* Make sure the interface supports multicast. */
if ((ifp->if_flags & IFF_MULTICAST) == 0)
return (EOPNOTSUPP);
/* Enable promiscuous reception of all IP multicasts. */
memset(&ifr, 0, sizeof(ifr));
satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
satosin(&ifr.ifr_addr)->sin_family = AF_INET;
satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
KERNEL_LOCK();
error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
KERNEL_UNLOCK();
if (error)
return (error);
}
vifp = malloc(sizeof(*vifp), M_MRTABLE, M_WAITOK | M_ZERO);
ifp->if_mcast = (caddr_t)vifp;
vifp->v_id = vifcp->vifc_vifi;
vifp->v_flags = vifcp->vifc_flags;
vifp->v_threshold = vifcp->vifc_threshold;
vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
return (0);
}
int
del_vif(struct socket *so, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
struct ifnet *ifp;
vifi_t *vifip;
unsigned int rtableid = inp->inp_rtableid;
NET_ASSERT_LOCKED();
if (m == NULL || m->m_len < sizeof(vifi_t))
return (EINVAL);
vifip = mtod(m, vifi_t *);
if ((ifp = if_lookupbyvif(*vifip, rtableid)) == NULL)
return (EADDRNOTAVAIL);
vif_delete(ifp);
return (0);
}
void
vif_delete(struct ifnet *ifp)
{
struct vif *v;
struct ifreq ifr;
if ((v = (struct vif *)ifp->if_mcast) == NULL)
return;
ifp->if_mcast = NULL;
memset(&ifr, 0, sizeof(ifr));
satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
satosin(&ifr.ifr_addr)->sin_family = AF_INET;
satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
KERNEL_LOCK();
(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
KERNEL_UNLOCK();
free(v, M_MRTABLE, sizeof(*v));
}
void
mfc_expire_route(struct rtentry *rt, u_int rtableid)
{
struct mfc *mfc = (struct mfc *)rt->rt_llinfo;
/* Skip entry being deleted. */
if (mfc == NULL)
return;
DPRINTF("Route domain %d origin %#08X group %#08x interface %d "
"expire %s", rtableid, satosin(rt->rt_gateway)->sin_addr.s_addr,
satosin(rt_key(rt))->sin_addr.s_addr,
rt->rt_ifidx, mfc->mfc_expire ? "yes" : "no");
/* Not expired, add it back to the queue. */
if (mfc->mfc_expire == 0) {
mfc->mfc_expire = 1;
rt_timer_add(rt, &ip_mrouterq, rtableid);
return;
}
mrt_mcast_del(rt, rtableid);
}
int
mfc_add_route(struct ifnet *ifp, struct sockaddr *origin,
struct sockaddr *group, struct mfcctl2 *mfccp, int wait)
{
struct vif *v = (struct vif *)ifp->if_mcast;
struct rtentry *rt;
struct mfc *mfc;
unsigned int rtableid = ifp->if_rdomain;
rt = rt_mcast_add(ifp, origin, group);
if (rt == NULL)
return (EHOSTUNREACH);
mfc = malloc(sizeof(*mfc), M_MRTABLE, wait | M_ZERO);
if (mfc == NULL) {
DPRINTF("origin %#08X group %#08X parent %d (%s) "
"malloc failed",
satosin(origin)->sin_addr.s_addr,
satosin(group)->sin_addr.s_addr,
mfccp->mfcc_parent, ifp->if_xname);
mrt_mcast_del(rt, rtableid);
rtfree(rt);
return (ENOMEM);
}
rt->rt_llinfo = (caddr_t)mfc;
rt_timer_add(rt, &ip_mrouterq, rtableid);
mfc->mfc_parent = mfccp->mfcc_parent;
mfc->mfc_pkt_cnt = 0;
mfc->mfc_byte_cnt = 0;
mfc->mfc_wrong_if = 0;
mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id];
mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config &
MRT_MFC_FLAGS_ALL;
mfc->mfc_expire = 0;
/* set the RP address */
if (mrt_api_config & MRT_MFC_RP)
mfc->mfc_rp = mfccp->mfcc_rp;
else
mfc->mfc_rp = zeroin_addr;
rtfree(rt);
return (0);
}
void
update_mfc_params(struct mfcctl2 *mfccp, int wait, unsigned int rtableid)
{
struct rtentry *rt;
struct mfc *mfc;
struct ifnet *ifp;
int i;
struct sockaddr_in osin, msin;
memset(&osin, 0, sizeof(osin));
osin.sin_len = sizeof(osin);
osin.sin_family = AF_INET;
osin.sin_addr = mfccp->mfcc_origin;
memset(&msin, 0, sizeof(msin));
msin.sin_len = sizeof(msin);
msin.sin_family = AF_INET;
msin.sin_addr = mfccp->mfcc_mcastgrp;
for (i = 0; i < MAXVIFS; i++) {
/* Don't add/del upstream routes here. */
if (i == mfccp->mfcc_parent)
continue;
/* Test for vif existence and then update the entry. */
if ((ifp = if_lookupbyvif(i, rtableid)) == NULL)
continue;
rt = mfc_find(ifp, &mfccp->mfcc_origin,
&mfccp->mfcc_mcastgrp, rtableid);
/* vif not configured or removed. */
if (mfccp->mfcc_ttls[i] == 0) {
/* Route doesn't exist, nothing to do. */
if (rt == NULL)
continue;
DPRINTF("del route (group %#08X) for vif %d (%s)",
mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
mrt_mcast_del(rt, rtableid);
rtfree(rt);
continue;
}
/* Route exists, look for changes. */
if (rt != NULL) {
mfc = (struct mfc *)rt->rt_llinfo;
/* Skip route being deleted. */
if (mfc == NULL) {
rtfree(rt);
continue;
}
/* No new changes to apply. */
if (mfccp->mfcc_ttls[i] == mfc->mfc_ttl &&
mfccp->mfcc_parent == mfc->mfc_parent) {
rtfree(rt);
continue;
}
DPRINTF("update route (group %#08X) for vif %d (%s)",
mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
mfc->mfc_ttl = mfccp->mfcc_ttls[i];
mfc->mfc_parent = mfccp->mfcc_parent;
rtfree(rt);
continue;
}
DPRINTF("add route (group %#08X) for vif %d (%s)",
mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
mfc_add_route(ifp, sintosa(&osin), sintosa(&msin),
mfccp, wait);
}
/* Create route for the parent interface. */
if ((ifp = if_lookupbyvif(mfccp->mfcc_parent, rtableid)) == NULL) {
DPRINTF("failed to find upstream interface %d",
mfccp->mfcc_parent);
return;
}
/* We already have a route, nothing to do here. */
if ((rt = mfc_find(ifp, &mfccp->mfcc_origin,
&mfccp->mfcc_mcastgrp, rtableid)) != NULL) {
rtfree(rt);
return;
}
DPRINTF("add upstream route (group %#08X) for if %s",
mfccp->mfcc_mcastgrp.s_addr, ifp->if_xname);
mfc_add_route(ifp, sintosa(&osin), sintosa(&msin), mfccp, wait);
}
int
mfc_add(struct mfcctl2 *mfcctl2, struct in_addr *origin,
struct in_addr *group, int vidx, unsigned int rtableid, int wait)
{
struct ifnet *ifp;
struct vif *v;
struct mfcctl2 mfcctl;
ifp = if_lookupbyvif(vidx, rtableid);
if (ifp == NULL ||
(v = (struct vif *)ifp->if_mcast) == NULL)
return (EHOSTUNREACH);
memset(&mfcctl, 0, sizeof(mfcctl));
if (mfcctl2 == NULL) {
mfcctl.mfcc_origin = *origin;
mfcctl.mfcc_mcastgrp = *group;
mfcctl.mfcc_parent = vidx;
} else
memcpy(&mfcctl, mfcctl2, sizeof(mfcctl));
update_mfc_params(&mfcctl, wait, rtableid);
return (0);
}
int
add_mfc(struct socket *so, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
struct mfcctl2 mfcctl2;
int mfcctl_size = sizeof(struct mfcctl);
unsigned int rtableid = inp->inp_rtableid;
NET_ASSERT_LOCKED();
if (mrt_api_config & MRT_API_FLAGS_ALL)
mfcctl_size = sizeof(struct mfcctl2);
if (m == NULL || m->m_len < mfcctl_size)
return (EINVAL);
/*
* select data size depending on API version.
*/
if (mrt_api_config & MRT_API_FLAGS_ALL) {
struct mfcctl2 *mp2 = mtod(m, struct mfcctl2 *);
memcpy((caddr_t)&mfcctl2, mp2, sizeof(*mp2));
} else {
struct mfcctl *mp = mtod(m, struct mfcctl *);
memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0,
sizeof(mfcctl2) - sizeof(struct mfcctl));
}
if (mfc_add(&mfcctl2, &mfcctl2.mfcc_origin, &mfcctl2.mfcc_mcastgrp,
mfcctl2.mfcc_parent, rtableid, M_WAITOK) == -1)
return (EINVAL);
return (0);
}
int
del_mfc(struct socket *so, struct mbuf *m)
{
struct inpcb *inp = sotoinpcb(so);
struct rtentry *rt;
struct mfcctl2 mfcctl2;
int mfcctl_size = sizeof(struct mfcctl);
struct mfcctl *mp;
unsigned int rtableid = inp->inp_rtableid;
NET_ASSERT_LOCKED();
/*
* XXX: for deleting MFC entries the information in entries
* of size "struct mfcctl" is sufficient.
*/
if (m == NULL || m->m_len < mfcctl_size)
return (EINVAL);
mp = mtod(m, struct mfcctl *);
memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0,
sizeof(mfcctl2) - sizeof(struct mfcctl));
DPRINTF("origin %#08X group %#08X rtableid %d",
mfcctl2.mfcc_origin.s_addr, mfcctl2.mfcc_mcastgrp.s_addr, rtableid);
while ((rt = mfc_find(NULL, &mfcctl2.mfcc_origin,
&mfcctl2.mfcc_mcastgrp, rtableid)) != NULL) {
mrt_mcast_del(rt, rtableid);
rtfree(rt);
}
return (0);
}
int
socket_send(struct socket *so, struct mbuf *mm, struct sockaddr_in *src)
{
if (so != NULL) {
int ret;
mtx_enter(&so->so_rcv.sb_mtx);
ret = sbappendaddr(so, &so->so_rcv, sintosa(src), mm, NULL);
mtx_leave(&so->so_rcv.sb_mtx);
if (ret != 0) {
sorwakeup(so);
return (0);
}
}
m_freem(mm);
return (-1);
}
/*
* IP multicast forwarding function. This function assumes that the packet
* pointed to by "ip" has arrived on (or is about to be sent to) the interface
* pointed to by "ifp", and the packet is to be relayed to other networks
* that have members of the packet's destination IP multicast group.
*
* The packet is returned unscathed to the caller, unless it is
* erroneous, in which case a non-zero return value tells the caller to
* discard it.
*/
#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
int
ip_mforward(struct mbuf *m, struct ifnet *ifp)
{
struct ip *ip = mtod(m, struct ip *);
struct vif *v;
struct rtentry *rt;
static int srctun = 0;
struct mbuf *mm;
unsigned int rtableid = ifp->if_rdomain;
if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
((u_char *)(ip + 1))[1] != IPOPT_LSRR) {
/*
* Packet arrived via a physical interface or
* an encapsulated tunnel or a register_vif.
*/
} else {
/*
* Packet arrived through a source-route tunnel.
* Source-route tunnels are no longer supported.
*/
if ((srctun++ % 1000) == 0)
log(LOG_ERR, "ip_mforward: received source-routed "
"packet from %x\n", ntohl(ip->ip_src.s_addr));
return (EOPNOTSUPP);
}
/*
* Don't forward a packet with time-to-live of zero or one,
* or a packet destined to a local-only group.
*/
if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ip->ip_dst.s_addr))
return (0);
/*
* Determine forwarding vifs from the forwarding cache table
*/
++mrtstat.mrts_mfc_lookups;
rt = mfc_find(NULL, &ip->ip_src, &ip->ip_dst, rtableid);
/* Entry exists, so forward if necessary */
if (rt != NULL) {
return (ip_mdq(m, ifp, rt));
} else {
/*
* If we don't have a route for packet's origin,
* Make a copy of the packet & send message to routing daemon
*/
int hlen = ip->ip_hl << 2;
++mrtstat.mrts_mfc_misses;
mrtstat.mrts_no_route++;
{
struct igmpmsg *im;
/*
* Locate the vifi for the incoming interface for
* this packet.
* If none found, drop packet.
*/
if ((v = (struct vif *)ifp->if_mcast) == NULL)
return (EHOSTUNREACH);
/*
* Make a copy of the header to send to the user level
* process
*/
mm = m_copym(m, 0, hlen, M_NOWAIT);
if (mm == NULL ||
(mm = m_pullup(mm, hlen)) == NULL)
return (ENOBUFS);
/*
* Send message to routing daemon to install
* a route into the kernel table
*/
im = mtod(mm, struct igmpmsg *);
im->im_msgtype = IGMPMSG_NOCACHE;
im->im_mbz = 0;
im->im_vif = v->v_id;
mrtstat.mrts_upcalls++;
sin.sin_addr = ip->ip_src;
if (socket_send(ip_mrouter[rtableid], mm, &sin) < 0) {
log(LOG_WARNING, "ip_mforward: ip_mrouter "
"socket queue full\n");
++mrtstat.mrts_upq_sockfull;
return (ENOBUFS);
}
mfc_add(NULL, &ip->ip_src, &ip->ip_dst, v->v_id,
rtableid, M_NOWAIT);
}
return (0);
}
}
/*
* Packet forwarding routine once entry in the cache is made
*/
int
ip_mdq(struct mbuf *m, struct ifnet *ifp0, struct rtentry *rt)
{
struct ip *ip = mtod(m, struct ip *);
struct mfc *mfc = (struct mfc *)rt->rt_llinfo;
struct vif *v = (struct vif *)ifp0->if_mcast;
struct ifnet *ifp;
struct mbuf *mc;
struct ip_moptions imo;
/* Sanity check: we have all promised pointers. */
if (v == NULL || mfc == NULL) {
rtfree(rt);
return (EHOSTUNREACH);
}
/*
* Don't forward if it didn't arrive from the parent vif for its origin.
*/
if (mfc->mfc_parent != v->v_id) {
/* came in the wrong interface */
++mrtstat.mrts_wrong_if;
mfc->mfc_wrong_if++;
rtfree(rt);
return (0);
}
/* If I sourced this packet, it counts as output, else it was input. */
if (in_hosteq(ip->ip_src, v->v_lcl_addr)) {
v->v_pkt_out++;
v->v_bytes_out += m->m_pkthdr.len;
} else {
v->v_pkt_in++;
v->v_bytes_in += m->m_pkthdr.len;
}
/*
* For each vif, decide if a copy of the packet should be forwarded.
* Forward if:
* - the ttl exceeds the vif's threshold
* - there are group members downstream on interface
*/
do {
/* Don't consider non multicast routes. */
if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
(RTF_HOST | RTF_MULTICAST))
continue;
mfc = (struct mfc *)rt->rt_llinfo;
if (mfc == NULL)
continue;
mfc->mfc_pkt_cnt++;
mfc->mfc_byte_cnt += m->m_pkthdr.len;
/* Don't let this route expire. */
mfc->mfc_expire = 0;
if (ip->ip_ttl <= mfc->mfc_ttl)
continue;
if ((ifp = if_get(rt->rt_ifidx)) == NULL)
continue;
/* Sanity check: did we configure this? */
if ((v = (struct vif *)ifp->if_mcast) == NULL) {
if_put(ifp);
continue;
}
/* Don't send in the upstream interface. */
if (mfc->mfc_parent == v->v_id) {
if_put(ifp);
continue;
}
v->v_pkt_out++;
v->v_bytes_out += m->m_pkthdr.len;
/*
* Make a new reference to the packet; make sure
* that the IP header is actually copied, not
* just referenced, so that ip_output() only
* scribbles on the copy.
*/
mc = m_dup_pkt(m, max_linkhdr, M_NOWAIT);
if (mc == NULL) {
if_put(ifp);
rtfree(rt);
return (ENOBUFS);
}
/*
* if physical interface option, extract the options
* and then send
*/
imo.imo_ifidx = rt->rt_ifidx;
imo.imo_ttl = ip->ip_ttl - IPTTLDEC;
imo.imo_loop = 1;
ip_output(mc, NULL, NULL, IP_FORWARDING, &imo, NULL, 0);
if_put(ifp);
} while ((rt = rtable_iterate(rt)) != NULL);
return (0);
}
struct ifnet *
if_lookupbyvif(vifi_t vifi, unsigned int rtableid)
{
struct vif *v;
struct ifnet *ifp;
TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
if (ifp->if_rdomain != rtableid)
continue;
if ((v = (struct vif *)ifp->if_mcast) == NULL)
continue;
if (v->v_id != vifi)
continue;
return (ifp);
}
return (NULL);
}
struct rtentry *
rt_mcast_add(struct ifnet *ifp, struct sockaddr *origin, struct sockaddr *group)
{
struct ifaddr *ifa;
int rv;
unsigned int rtableid = ifp->if_rdomain;
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family == AF_INET)
break;
}
if (ifa == NULL) {
DPRINTF("ifa == NULL");
return (NULL);
}
rv = rt_ifa_add(ifa, RTF_HOST | RTF_MULTICAST | RTF_MPATH,
group, ifp->if_rdomain);
if (rv != 0) {
DPRINTF("rt_ifa_add failed (%d)", rv);
return (NULL);
}
mrt_count[rtableid]++;
return (mfc_find(ifp, NULL, &satosin(group)->sin_addr, rtableid));
}
void
mrt_mcast_del(struct rtentry *rt, unsigned int rtableid)
{
struct ifnet *ifp;
int error;
/* Remove all timers related to this route. */
rt_timer_remove_all(rt);
free(rt->rt_llinfo, M_MRTABLE, sizeof(struct mfc));
rt->rt_llinfo = NULL;
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return;
error = rtdeletemsg(rt, ifp, rtableid);
if_put(ifp);
if (error)
DPRINTF("delete route error %d\n", error);
mrt_count[rtableid]--;
}