From f0068c4a701b5465bf29ebd9913a457d3bc5f2a1 Mon Sep 17 00:00:00 2001 From: Garrett Wollman Date: Tue, 6 Sep 1994 22:42:31 +0000 Subject: [PATCH] Initial get-the-easy-case-working upgrade of the multicast code to something more recent than the ancient 1.2 release contained in 4.4. This code has the following advantages as compared to previous versions (culled from the README file for the SunOS release): - True multicast delivery - Configurable rate-limiting of forwarded multicast traffic on each physical interface or tunnel, using a token-bucket limiter. - Simplistic classification of packets for prioritized dropping. - Administrative scoping of multicast address ranges. - Faster detection of hosts leaving groups. - Support for multicast traceroute (code not yet available). - Support for RSVP, the Resource Reservation Protocol. What still needs to be done: - The multicast forwarder needs testing. - The multicast routing daemon needs to be ported. - Network interface drivers need to have the `#ifdef MULTICAST' goop ripped out of them. - The IGMP code should probably be bogon-tested. Some notes about the porting process: In some cases, the Berkeley people decided to incorporate functionality from later releases of the multicast code, but then had to do things differently. As a result, if you look at Deering's patches, and then look at our code, it is not always obvious whether the patch even applies. Let the reader beware. I ran ip_mroute.c through several passes of `unifdef' to get rid of useless grot, and to permanently enable the RSVP support, which we will include as standard. Ported by: Garrett Wollman Submitted by: Steve Deering and Ajit Thyagarajan (among others) --- sys/net/route.c | 7 +- sys/netinet/igmp.c | 465 ++++++-- sys/netinet/igmp.h | 34 +- sys/netinet/igmp_var.h | 25 +- sys/netinet/in.h | 7 +- sys/netinet/in_proto.c | 224 ++-- sys/netinet/in_var.h | 16 +- sys/netinet/ip_input.c | 40 +- sys/netinet/ip_mroute.c | 2272 +++++++++++++++++++++++++++------------ sys/netinet/ip_mroute.h | 216 ++-- sys/netinet/ip_output.c | 39 +- sys/netinet/ip_var.h | 8 +- sys/netinet/raw_ip.c | 19 +- sys/sys/sockio.h | 7 +- 14 files changed, 2423 insertions(+), 956 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index 9673a7abb2e1..64f8c7a7ff20 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)route.c 8.2 (Berkeley) 11/15/93 - * $Id$ + * $Id: route.c,v 1.3 1994/08/02 07:46:40 davidg Exp $ */ #include @@ -273,7 +273,12 @@ rtioctl(req, data, p) caddr_t data; struct proc *p; { +#ifdef MULTICAST + /* Multicast goop, grrr... */ + return mrt_ioctl(cmd, data, p); +#else return (EOPNOTSUPP); +#endif } struct ifaddr * diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index e39d2d9945d1..ca1db1ea69d2 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -35,11 +35,17 @@ * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 - * $Id$ + * $Id: igmp.c,v 1.3 1994/08/02 07:48:04 davidg Exp $ */ -/* Internet Group Management Protocol (IGMP) routines. */ - +/* + * Internet Group Management Protocol (IGMP) routines. + * + * Written by Steve Deering, Stanford, May 1988. + * Modified by Rosen Sharma, Stanford, Aug 1994. + * + * MULTICAST 1.4 + */ #include #include @@ -60,10 +66,14 @@ extern struct ifnet loif; +struct igmpstat igmpstat; + static int igmp_timers_are_running = 0; static u_long igmp_all_hosts_group; +static struct router_info *Head = 0; -static void igmp_sendreport __P((struct in_multi *)); +static void igmp_sendpkt(struct in_multi *, int); +static void igmp_sendleave(struct in_multi *); void igmp_init() @@ -72,6 +82,73 @@ igmp_init() * To avoid byte-swapping the same value over and over again. */ igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP); + Head = (struct router_info *) 0; +} + +int +fill_rti(inm) + struct in_multi *inm; +{ + register struct router_info *rti = Head; + +#ifdef IGMP_DEBUG + printf("[igmp.c, _fill_rti] --> entering \n"); +#endif + while (rti) { + if (rti->ifp == inm->inm_ifp){ /* ? is it ok to compare */ + /* pointers */ + inm->inm_rti = rti; +#ifdef IGMP_DEBUG + printf("[igmp.c, _fill_rti] --> found old entry \n"); +#endif + if (rti->type == IGMP_OLD_ROUTER) + return IGMP_HOST_MEMBERSHIP_REPORT; + else + return IGMP_HOST_NEW_MEMBERSHIP_REPORT; + } + rti = rti->next; + } + MALLOC(rti, struct router_info *, sizeof *rti, M_MRTABLE, M_NOWAIT); + rti->ifp = inm->inm_ifp; + rti->type = IGMP_NEW_ROUTER; + rti->time = IGMP_AGE_THRESHOLD; + rti->next = Head; + Head = rti; + inm->inm_rti = rti; +#ifdef IGMP_DEBUG + printf("[igmp.c, _fill_rti] --> created new entry \n"); +#endif + return IGMP_HOST_NEW_MEMBERSHIP_REPORT; +} + +struct router_info * +find_rti(ifp) + struct ifnet *ifp; +{ + register struct router_info *rti = Head; + +#ifdef IGMP_DEBUG + printf("[igmp.c, _find_rti] --> entering \n"); +#endif + while (rti) { + if (rti->ifp == ifp){ /* ? is it ok to compare pointers */ +#ifdef IGMP_DEBUG + printf("[igmp.c, _find_rti] --> found old entry \n"); +#endif + return rti; + } + rti = rti->next; + } + MALLOC(rti, struct router_info *, sizeof *rti, M_MRTABLE, M_NOWAIT); + rti->ifp = ifp; + rti->type = IGMP_NEW_ROUTER; + rti->time = IGMP_AGE_THRESHOLD; + rti->next = Head; + Head = rti; +#ifdef IGMP_DEBUG + printf("[igmp.c, _find_rti] --> created an entry \n"); +#endif + return rti; } void @@ -87,6 +164,9 @@ igmp_input(m, iphlen) register struct in_multi *inm; register struct in_ifaddr *ia; struct in_multistep step; + struct router_info *rti; + + static int timer; /** timer value in the igmp query header **/ ++igmpstat.igps_rcv_total; @@ -121,7 +201,10 @@ igmp_input(m, iphlen) } m->m_data -= iphlen; m->m_len += iphlen; + ip = mtod(m, struct ip *); + timer = ntohs(igmp->igmp_code); + rti = find_rti(ifp); switch (igmp->igmp_type) { @@ -131,29 +214,127 @@ igmp_input(m, iphlen) if (ifp == &loif) break; - if (ip->ip_dst.s_addr != igmp_all_hosts_group) { - ++igmpstat.igps_rcv_badqueries; - m_freem(m); - return; - } + if (igmp->igmp_code == 0) { + if (ip->ip_dst.s_addr != igmp_all_hosts_group) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; + } - /* - * Start the timers in all of our membership records for - * the interface on which the query arrived, except those - * that are already running and those that belong to the - * "all-hosts" group. - */ - IN_FIRST_MULTI(step, inm); - while (inm != NULL) { - if (inm->inm_ifp == ifp && inm->inm_timer == 0 && - inm->inm_addr.s_addr != igmp_all_hosts_group) { - inm->inm_timer = - IGMP_RANDOM_DELAY(inm->inm_addr); - igmp_timers_are_running = 1; + /* + * Start the timers in all of our membership records for + * the interface on which the query arrived, except those + * that are already running and those that belong to the + * "all-hosts" group. + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_ifp == ifp + && inm->inm_timer == 0 + && inm->inm_addr.s_addr + != igmp_all_hosts_group) { + + inm->inm_state = IGMP_DELAYING_MEMBER; + inm->inm_timer = IGMP_RANDOM_DELAY( + IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ ); + + igmp_timers_are_running = 1; + } + IN_NEXT_MULTI(step, inm); + } + } else { + /* + ** New Router + */ + + if (ip->ip_dst.s_addr != igmp_all_hosts_group) { + if (!(m->m_flags & M_MCAST)) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; + } + } + if (ip->ip_dst.s_addr == igmp_all_hosts_group) { + + /* + * - Start the timers in all of our membership records + * for the interface on which the query arrived + * excl. those that belong to the "all-hosts" group. + * - For timers already running check if they need to + * be reset. + * - Use the igmp->igmp_code filed as the maximum + * delay possible + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL){ + switch(inm->inm_state){ + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + if (inm->inm_ifp == ifp && + inm->inm_addr.s_addr != + igmp_all_hosts_group) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_DELAYING_MEMBER: + if (inm->inm_ifp == ifp && + (inm->inm_timer > + timer * PR_FASTHZ / IGMP_TIMER_SCALE) + && + inm->inm_addr.s_addr != + igmp_all_hosts_group) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_SLEEPING_MEMBER: + inm->inm_state = IGMP_AWAKENING_MEMBER; + break; + } + IN_NEXT_MULTI(step, inm); + } + } else { + /* + ** group specific query + */ + + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_addr.s_addr == ip->ip_dst.s_addr) { + switch(inm->inm_state ){ + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + inm->inm_state = IGMP_DELAYING_MEMBER; + if (inm->inm_ifp == ifp ) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_DELAYING_MEMBER: + inm->inm_state = IGMP_DELAYING_MEMBER; + if (inm->inm_ifp == ifp && + (inm->inm_timer > + timer * PR_FASTHZ / IGMP_TIMER_SCALE) ) { + inm->inm_timer = IGMP_RANDOM_DELAY(timer); + igmp_timers_are_running = 1; + inm->inm_state = IGMP_DELAYING_MEMBER; + } + break; + case IGMP_SLEEPING_MEMBER: + inm->inm_state = IGMP_AWAKENING_MEMBER; + break; + } } IN_NEXT_MULTI(step, inm); } - + } + } break; case IGMP_HOST_MEMBERSHIP_REPORT: @@ -193,7 +374,80 @@ igmp_input(m, iphlen) ++igmpstat.igps_rcv_ourreports; } + if (inm != NULL) { + inm->inm_timer = 0; + ++igmpstat.igps_rcv_ourreports; + + switch(inm->inm_state){ + case IGMP_IDLE_MEMBER: + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + case IGMP_SLEEPING_MEMBER: + inm->inm_state = IGMP_SLEEPING_MEMBER; + break; + case IGMP_DELAYING_MEMBER: + /** check this out - this was if (oldrouter) **/ + if (inm->inm_rti->type == IGMP_OLD_ROUTER) + inm->inm_state = IGMP_LAZY_MEMBER; + else inm->inm_state = IGMP_SLEEPING_MEMBER; + break; + } + } + break; + + case IGMP_HOST_NEW_MEMBERSHIP_REPORT: + /* + * an new report + */ + ++igmpstat.igps_rcv_reports; + + if (ifp == &loif) + break; + + if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || + igmp->igmp_group.s_addr != ip->ip_dst.s_addr) { + ++igmpstat.igps_rcv_badreports; + m_freem(m); + return; + } + + /* + * KLUDGE: if the IP source address of the report has an + * unspecified (i.e., zero) subnet number, as is allowed for + * a booting host, replace it with the correct subnet number + * so that a process-level multicast routing demon can + * determine which subnet it arrived from. This is necessary + * to compensate for the lack of any way for a process to + * determine the arrival interface of an incoming packet. + */ + if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) { + IFP_TO_IA(ifp, ia); + if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet); + } + + /* + * If we belong to the group being reported, stop + * our timer for that group. + */ + IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); + if (inm != NULL) { + inm->inm_timer = 0; + ++igmpstat.igps_rcv_ourreports; + + switch(inm->inm_state){ + case IGMP_DELAYING_MEMBER: + case IGMP_IDLE_MEMBER: + inm->inm_state = IGMP_LAZY_MEMBER; + break; + case IGMP_AWAKENING_MEMBER: + inm->inm_state = IGMP_LAZY_MEMBER; + break; + case IGMP_LAZY_MEMBER: + case IGMP_SLEEPING_MEMBER: + break; + } + } } /* @@ -209,12 +463,16 @@ igmp_joingroup(inm) { register int s = splnet(); + inm->inm_state = IGMP_IDLE_MEMBER; + if (inm->inm_addr.s_addr == igmp_all_hosts_group || inm->inm_ifp == &loif) inm->inm_timer = 0; else { - igmp_sendreport(inm); - inm->inm_timer = IGMP_RANDOM_DELAY(inm->inm_addr); + igmp_sendpkt(inm,fill_rti(inm)); + inm->inm_timer = IGMP_RANDOM_DELAY( + IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ); + inm->inm_state = IGMP_DELAYING_MEMBER; igmp_timers_are_running = 1; } splx(s); @@ -227,6 +485,19 @@ igmp_leavegroup(inm) /* * No action required on leaving a group. */ + switch(inm->inm_state){ + case IGMP_DELAYING_MEMBER: + case IGMP_IDLE_MEMBER: + if (!(inm->inm_addr.s_addr == igmp_all_hosts_group || + inm->inm_ifp == &loif)) + if (inm->inm_rti->type != IGMP_OLD_ROUTER) + igmp_sendleave(inm); + break; + case IGMP_LAZY_MEMBER: + case IGMP_AWAKENING_MEMBER: + case IGMP_SLEEPING_MEMBER: + break; + } } void @@ -250,7 +521,13 @@ igmp_fasttimo() if (inm->inm_timer == 0) { /* do nothing */ } else if (--inm->inm_timer == 0) { - igmp_sendreport(inm); + if (inm->inm_state == IGMP_DELAYING_MEMBER) { + if (inm->inm_rti->type == IGMP_OLD_ROUTER) + igmp_sendpkt(inm, IGMP_HOST_MEMBERSHIP_REPORT); + else + igmp_sendpkt(inm, IGMP_HOST_NEW_MEMBERSHIP_REPORT); + inm->inm_state = IGMP_IDLE_MEMBER; + } } else { igmp_timers_are_running = 1; } @@ -259,57 +536,93 @@ igmp_fasttimo() splx(s); } -static void -igmp_sendreport(inm) - register struct in_multi *inm; +void +igmp_slowtimo() { - register struct mbuf *m; - register struct igmp *igmp; - register struct ip *ip; - register struct ip_moptions *imo; - struct ip_moptions simo; + int s = splnet(); + register struct router_info *rti = Head; - MGETHDR(m, M_DONTWAIT, MT_HEADER); - if (m == NULL) - return; - /* - * Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN - * is smaller than mbuf size returned by MGETHDR. - */ - m->m_data += max_linkhdr; - m->m_len = sizeof(struct ip) + IGMP_MINLEN; - m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN; - - ip = mtod(m, struct ip *); - ip->ip_tos = 0; - ip->ip_len = sizeof(struct ip) + IGMP_MINLEN; - ip->ip_off = 0; - ip->ip_p = IPPROTO_IGMP; - ip->ip_src.s_addr = INADDR_ANY; - ip->ip_dst = inm->inm_addr; - - igmp = (struct igmp *)(ip + 1); - igmp->igmp_type = IGMP_HOST_MEMBERSHIP_REPORT; - igmp->igmp_code = 0; - igmp->igmp_group = inm->inm_addr; - igmp->igmp_cksum = 0; - igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN); - - imo = &simo; - bzero((caddr_t)imo, sizeof(*imo)); - imo->imo_multicast_ifp = inm->inm_ifp; - imo->imo_multicast_ttl = 1; - /* - * Request loopback of the report if we are acting as a multicast - * router, so that the process-level routing demon can hear it. - */ -#ifdef MROUTING - { - extern struct socket *ip_mrouter; - imo->imo_multicast_loop = (ip_mrouter != NULL); - } +#ifdef IGMP_DEBUG + printf("[igmp.c,_slowtimo] -- > entering \n"); #endif - ip_output(m, NULL, NULL, 0, imo); - - ++igmpstat.igps_snd_reports; + while (rti) { + rti->time ++; + if (rti->time >= IGMP_AGE_THRESHOLD){ + rti->type = IGMP_NEW_ROUTER; + rti->time = IGMP_AGE_THRESHOLD; + } + rti = rti->next; + } +#ifdef IGMP_DEBUG + printf("[igmp.c,_slowtimo] -- > exiting \n"); +#endif + splx(s); +} + +static void +igmp_sendpkt(inm, type) + struct in_multi *inm; + int type; +{ + struct mbuf *m; + struct igmp *igmp; + struct ip *ip; + struct ip_moptions *imo; + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) + return; + + MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_DONTWAIT); + if (!imo) { + m_free(m); + return; + } + + m->m_pkthdr.rcvif = &loif; + m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN; + MH_ALIGN(m, IGMP_MINLEN + sizeof(struct ip)); + m->m_data += sizeof(struct ip); + m->m_len = IGMP_MINLEN; + igmp = mtod(m, struct igmp *); + igmp->igmp_type = type; + igmp->igmp_code = 0; + igmp->igmp_group = inm->inm_addr; + igmp->igmp_cksum = 0; + igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN); + + m->m_data -= sizeof(struct ip); + m->m_len += sizeof(struct ip); + ip = mtod(m, struct ip *); + ip->ip_tos = 0; + ip->ip_len = sizeof(struct ip) + IGMP_MINLEN; + ip->ip_off = 0; + ip->ip_p = IPPROTO_IGMP; + ip->ip_src.s_addr = INADDR_ANY; + ip->ip_dst = igmp->igmp_group; + + imo->imo_multicast_ifp = inm->inm_ifp; + imo->imo_multicast_ttl = 1; + /* + * Request loopback of the report if we are acting as a multicast + * router, so that the process-level routing demon can hear it. + */ +#ifdef MROUTING + imo->imo_multicast_loop = (ip_mrouter != NULL); +#else + imo->imo_multicast_loop = 0; +#endif + + ip_output(m, (struct mbuf *)0, (struct route *)0, 0, imo); + + FREE(imo, M_IPMOPTS); + ++igmpstat.igps_snd_reports; + +} + +static void +igmp_sendleave(inm) + struct in_multi *inm; +{ + igmp_sendpkt(inm, IGMP_HOST_LEAVE_MESSAGE); } diff --git a/sys/netinet/igmp.h b/sys/netinet/igmp.h index 52c672bdd3f8..1e082c4e570d 100644 --- a/sys/netinet/igmp.h +++ b/sys/netinet/igmp.h @@ -35,13 +35,19 @@ * SUCH DAMAGE. * * @(#)igmp.h 8.1 (Berkeley) 6/10/93 - * $Id: igmp.h,v 1.2 1994/08/02 07:48:07 davidg Exp $ + * $Id: igmp.h,v 1.3 1994/08/21 05:27:25 paul Exp $ */ #ifndef _NETINET_IGMP_H_ #define _NETINET_IGMP_H_ -/* Internet Group Management Protocol (IGMP) definitions. */ +/* + * Internet Group Management Protocol (IGMP) definitions. + * + * Written by Steve Deering, Stanford, May 1988. + * + * MULTICAST 1.2 + */ /* * IGMP packet format. @@ -59,7 +65,29 @@ struct igmp { #define IGMP_HOST_MEMBERSHIP_REPORT 0x12 #define IGMP_DVMRP 0x13 /* for experimental multicast */ /* routing protocol */ +#define IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 +#define IGMP_HOST_LEAVE_MESSAGE 0x17 +#define IGMP_MTRACE 0x1f /* mcast traceroute messages */ +#define IGMP_MTRACE_RESP 0x1e /* traceroute resp. (to sender) */ #define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ +#define IGMP_TIMER_SCALE 10 /* denotes that the igmp->timer filed */ + /*specifies time in 10th os seconds */ -#endif +#define IGMP_DELAYING_MEMBER 1 +#define IGMP_IDLE_MEMBER 2 +#define IGMP_LAZY_MEMBER 3 +#define IGMP_SLEEPING_MEMBER 4 +#define IGMP_AWAKENING_MEMBER 5 + + +#define IGMP_OLD_ROUTER 0 +#define IGMP_NEW_ROUTER 1 + +#define IGMP_AGE_THRESHOLD 540 + +#ifdef IGMP_STATES +static char *tostate[]={"","DELAYING_MEMBER","IDLE","LAZY","SLEEPING", + "AWAKENING" }; +#endif /* IGMP_STATES */ +#endif /* _NETINET_IGMP_H_ */ diff --git a/sys/netinet/igmp_var.h b/sys/netinet/igmp_var.h index 0018cd22f3ea..403a9115b4c1 100644 --- a/sys/netinet/igmp_var.h +++ b/sys/netinet/igmp_var.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)igmp_var.h 8.1 (Berkeley) 7/19/93 - * $Id: igmp_var.h,v 1.2 1994/08/02 07:48:09 davidg Exp $ + * $Id: igmp_var.h,v 1.3 1994/08/21 05:27:26 paul Exp $ */ #ifndef _NETINET_IGMP_VAR_H_ @@ -63,29 +63,16 @@ struct igmpstat { }; #ifdef KERNEL -struct igmpstat igmpstat; +extern struct igmpstat igmpstat; -/* - * Macro to compute a random timer value between 1 and (IGMP_MAX_REPORTING_ - * DELAY * countdown frequency). We generate a "random" number by adding - * the total number of IP packets received, our primary IP address, and the - * multicast address being timed-out. The 4.3 random() routine really - * ought to be available in the kernel! - */ -#define IGMP_RANDOM_DELAY(multiaddr) \ - /* struct in_addr multiaddr; */ \ - ( (ipstat.ips_total + \ - ntohl(IA_SIN(in_ifaddr)->sin_addr.s_addr) + \ - ntohl((multiaddr).s_addr) \ - ) \ - % (IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ) + 1 \ - ) +#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1) -void igmp_init __P(()); +void igmp_init __P((void)); void igmp_input __P((struct mbuf *, int)); void igmp_joingroup __P((struct in_multi *)); void igmp_leavegroup __P((struct in_multi *)); -void igmp_fasttimo __P(()); +void igmp_fasttimo __P((void)); +void igmp_slowtimo __P((void)); #endif #endif diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 0bf7ed436d05..c04336eda1b1 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 - * $Id: in.h,v 1.2 1994/08/02 07:48:14 davidg Exp $ + * $Id: in.h,v 1.3 1994/08/21 05:27:27 paul Exp $ */ #ifndef _NETINET_IN_H_ @@ -55,6 +55,7 @@ #define IPPROTO_UDP 17 /* user datagram protocol */ #define IPPROTO_IDP 22 /* xns idp */ #define IPPROTO_TP 29 /* tp-4 w/ class negotiation */ +#define IPPROTO_RSVP 46 /* resource reservation */ #define IPPROTO_EON 80 /* ISO cnlp */ #define IPPROTO_ENCAP 98 /* encapsulation header */ @@ -162,6 +163,10 @@ struct ip_opts { #define IP_MULTICAST_LOOP 11 /* u_char; set/get IP multicast loopback */ #define IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */ #define IP_DROP_MEMBERSHIP 13 /* ip_mreq; drop an IP group membership */ +#define IP_MULTICAST_VIF 14 /* set/get IP mcast virt. iface */ +#define IP_RSVP_ON 15 /* enable RSVP in kernel */ +#define IP_RSVP_OFF 16 /* disable RSVP in kernel */ + /* * Defaults and limits for options diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index fda64a13c940..b3a8841bcfb8 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -1,115 +1,131 @@ -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 - * $Id$ - */ + /* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_proto.c 8.1 (Berkeley) 6/10/93 + * $Id: in_proto.c,v 1.3 1994/08/02 07:48:23 davidg Exp $ + */ -#include -#include -#include -#include -#include + #include + #include + #include + #include + #include -#include -#include -#include + #include + #include + #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -/* - * TCP/IP protocol family: IP, ICMP, UDP, TCP. - */ + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + /* + * TCP/IP protocol family: IP, ICMP, UDP, TCP. + */ -#ifdef NSIP -void idpip_input(), nsip_ctlinput(); -#endif + #ifdef NSIP + void idpip_input(), nsip_ctlinput(); + #endif -#ifdef TPIP -void tpip_input(), tpip_ctlinput(), tp_ctloutput(); -int tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq(); -#endif + #ifdef TPIP + void tpip_input(), tpip_ctlinput(), tp_ctloutput(); + int tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq(); + #endif -#ifdef EON -void eoninput(), eonctlinput(), eonprotoinit(); -#endif /* EON */ + #ifdef EON + void eoninput(), eonctlinput(), eonprotoinit(); + #endif /* EON */ -extern struct domain inetdomain; + #ifdef MROUTING + void multiencap_decap(struct mbuf *); + #endif -struct protosw inetsw[] = { -{ 0, &inetdomain, 0, 0, - 0, ip_output, 0, 0, - 0, - ip_init, 0, ip_slowtimo, ip_drain, ip_sysctl -}, -{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, - udp_input, 0, udp_ctlinput, ip_ctloutput, - udp_usrreq, - udp_init, 0, 0, 0, udp_sysctl -}, -{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD, - tcp_input, 0, tcp_ctlinput, tcp_ctloutput, - tcp_usrreq, - tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, -}, -{ SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, - rip_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - 0, 0, 0, 0, -}, -{ SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR, - icmp_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - 0, 0, 0, 0, icmp_sysctl -}, -{ SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR, - igmp_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - igmp_init, igmp_fasttimo, 0, 0, -}, + extern struct domain inetdomain; + + struct protosw inetsw[] = { + { 0, &inetdomain, 0, 0, + 0, ip_output, 0, 0, + 0, + ip_init, 0, ip_slowtimo, ip_drain, ip_sysctl + }, + { SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, + udp_input, 0, udp_ctlinput, ip_ctloutput, + udp_usrreq, + udp_init, 0, 0, 0, udp_sysctl + }, + { SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD, + tcp_input, 0, tcp_ctlinput, tcp_ctloutput, + tcp_usrreq, + tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, + }, + { SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, + rip_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, + }, + { SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR, + icmp_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, icmp_sysctl + }, + { SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR, + igmp_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + igmp_init, igmp_fasttimo, igmp_slowtimo, 0, + }, + { SOCK_RAW, &inetdomain, IPPROTO_RSVP, PR_ATOMIC|PR_ADDR, + rip_input, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, + }, +#ifdef MROUTING + { SOCK_RAW, &inetdomain, IPPROTO_ENCAP, PR_ATOMIC|PR_ADDR, + multiencap_decap, rip_output, 0, rip_ctloutput, + rip_usrreq, + 0, 0, 0, 0, + }, +#endif /* MROUTING */ #ifdef TPIP { SOCK_SEQPACKET,&inetdomain, IPPROTO_TP, PR_CONNREQUIRED|PR_WANTRCVD, tpip_input, 0, tpip_ctlinput, tp_ctloutput, diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 768364484799..352e19a4f7b2 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_var.h 8.1 (Berkeley) 6/10/93 - * $Id: in_var.h,v 1.4 1994/08/18 22:35:29 wollman Exp $ + * $Id: in_var.h,v 1.5 1994/08/21 05:27:30 paul Exp $ */ #ifndef _NETINET_IN_VAR_H_ @@ -114,6 +114,18 @@ extern struct ifqueue ipintrq; /* ip packet input queue */ } #endif +/* + * This information should be part of the ifnet structure but we don't wish + * to change that - as it might break a number of things + */ + +struct router_info { + struct ifnet *ifp; + int type; /* type of router which is querier on this interface */ + int time; /* # of slow timeouts since last old query */ + struct router_info *next; +}; + /* * Internet multicast address structure. There is one of these for each IP * multicast group to which this host belongs on a given network interface. @@ -127,6 +139,8 @@ struct in_multi { u_int inm_refcount; /* no. membership claims by sockets */ u_int inm_timer; /* IGMP membership report timer */ struct in_multi *inm_next; /* ptr to next multicast address */ + u_int inm_state; /* state of the membership */ + struct router_info *inm_rti; /* router info*/ }; #ifdef KERNEL diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 4841fb1e4bea..40dd6bf93c06 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 - * $Id: ip_input.c,v 1.3 1994/08/02 07:48:38 davidg Exp $ + * $Id: ip_input.c,v 1.4 1994/08/18 22:35:30 wollman Exp $ */ #include @@ -56,6 +56,9 @@ #include #include +#include +struct socket *ip_rsvpd; + #ifndef IPFORWARDING #ifdef GATEWAY #define IPFORWARDING 1 /* forward IP packets not for us */ @@ -237,6 +240,15 @@ next: if (hlen > sizeof (struct ip) && ip_dooptions(m)) goto next; + /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no + * matter if it is destined to another node, or whether it is + * a multicast one, RSVP wants it! and prevents it from being forwarded + * anywhere else. Also checks if the rsvp daemon is running before + * grabbing the packet. + */ + if (ip_rsvpd != NULL && ip->ip_p==IPPROTO_RSVP) + goto ours; + /* * Check our list of addresses, to see if the packet is for us. */ @@ -271,8 +283,6 @@ next: if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; #ifdef MROUTING - extern struct socket *ip_mrouter; - if (ip_mrouter) { /* * If we are acting as a multicast router, all @@ -287,7 +297,7 @@ next: * ip_output().) */ ip->ip_id = htons(ip->ip_id); - if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { + if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { ipstat.ips_cantforward++; m_freem(m); goto next; @@ -1168,3 +1178,25 @@ ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) } /* NOTREACHED */ } + +int +ip_rsvp_init(struct socket *so) +{ + if (so->so_type != SOCK_RAW || + so->so_proto->pr_protocol != IPPROTO_RSVP) + return EOPNOTSUPP; + + if (ip_rsvpd != NULL) + return EADDRINUSE; + + ip_rsvpd = so; + + return 0; +} + +int +ip_rsvp_done(void) +{ + ip_rsvpd = NULL; + return 0; +} diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index b07d9193e1bc..b14951d7450d 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -1,210 +1,428 @@ /* - * Copyright (c) 1989 Stephen Deering - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Stephen Deering of Stanford University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 - * $Id$ - */ - -/* - * Procedures for the kernel part of DVMRP, - * a Distance-Vector Multicast Routing Protocol. - * (See RFC-1075.) + * IP multicast forwarding procedures * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. + * Modified by Mark J. Steiglitz, Stanford, May, 1991 + * Modified by Van Jacobson, LBL, January 1993 + * Modified by Ajit Thyagarajan, PARC, August 1993 * - * MROUTING 1.1 + * MROUTING 1.8 */ -#ifndef MROUTING -int ip_mrtproto; /* for netstat only */ -#else #include #include -#include -#include -#include #include -#include #include #include +#include +#include #include - +#include +#include #include #include #include - #include #include #include +#include #include #include -#include - #include #include #include -/* Static forwards */ -static int ip_mrouter_init __P((struct socket *)); -static int add_vif __P((struct vifctl *)); -static int del_vif __P((vifi_t *vifip)); -static int add_lgrp __P((struct lgrplctl *)); -static int del_lgrp __P((struct lgrplctl *)); -static int grplst_member __P((struct vif *, struct in_addr)); -static u_long nethash __P((struct in_addr in)); -static int add_mrt __P((struct mrtctl *)); -static int del_mrt __P((struct in_addr *)); -static struct mrt *mrtfind __P((struct in_addr)); -static void phyint_send __P((struct mbuf *, struct vif *)); -static void tunnel_send __P((struct mbuf *, struct vif *)); +#ifndef NTOHL +#if BYTE_ORDER != BIG_ENDIAN +#define NTOHL(d) ((d) = ntohl((d))) +#define NTOHS(d) ((d) = ntohs((u_short)(d))) +#define HTONL(d) ((d) = htonl((d))) +#define HTONS(d) ((d) = htons((u_short)(d))) +#else +#define NTOHL(d) +#define NTOHS(d) +#define HTONL(d) +#define HTONS(d) +#endif +#endif -#define INSIZ sizeof(struct in_addr) -#define same(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) -#define satosin(sa) ((struct sockaddr_in *)(sa)) +#ifndef MROUTING +/* + * Dummy routines and globals used when multicast routing is not compiled in. + */ + +struct socket *ip_mrouter = NULL; +u_int ip_mrtproto = 0; + +int +ip_mrouter_cmd(cmd, so, m) + int cmd; + struct socket *so; + struct mbuf *m; +{ + return(EOPNOTSUPP); +} + +int +ip_mrouter_done() +{ + return(0); +} + +int +ip_mforward(ip, ifp, m) + struct ip *ip; + struct ifnet *ifp; + struct mbuf *m; +{ + return(0); +} +#else + +#define INSIZ sizeof(struct in_addr) +#define same(a1, a2) \ + (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) + +#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ /* * Globals. All but ip_mrouter and ip_mrtproto could be static, * except for netstat or debugging purposes. */ -struct socket *ip_mrouter = NULL; -int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ +struct socket *ip_mrouter = NULL; +int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ -struct mrt *mrttable[MRTHASHSIZ]; -struct vif viftable[MAXVIFS]; -struct mrtstat mrtstat; +#define NO_RTE_FOUND 0x1 +#define RTE_FOUND 0x2 + +struct mbuf *mfctable[MFCTBLSIZ]; +struct vif viftable[MAXVIFS]; +struct mrtstat mrtstat; +u_int mrtdebug = 0; /* debug level */ +u_int tbfdebug = 0; /* tbf debug level */ + +u_long timeout_val = 0; /* count of outstanding upcalls */ + +/* + * Define the token bucket filter structures + * tbftable -> each vif has one of these for storing info + * qtable -> each interface has an associated queue of pkts + */ + +struct tbf tbftable[MAXVIFS]; +struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; + +/* + * 'Interfaces' associated with decapsulator (so we can tell + * packets that went through it from ones that get reflected + * by a broken gateway). These interfaces are never linked into + * the system ifnet list & no routes point to them. I.e., packets + * can't be sent this way. They only exist as a placeholder for + * multicast source verification. + */ +struct ifnet multicast_decap_if[MAXVIFS]; + +#define ENCAP_TTL 64 +#define ENCAP_PROTO 4 + +/* prototype IP hdr for encapsulated packets */ +struct ip multicast_encap_iphdr = { +#if defined(ultrix) || defined(i386) + sizeof(struct ip) >> 2, IPVERSION, +#else + IPVERSION, sizeof(struct ip) >> 2, +#endif + 0, /* tos */ + sizeof(struct ip), /* total length */ + 0, /* id */ + 0, /* frag offset */ + ENCAP_TTL, ENCAP_PROTO, + 0, /* checksum */ +}; /* * Private variables. */ -static vifi_t numvifs = 0; -static struct mrt *cached_mrt = NULL; -static u_long cached_origin; -static u_long cached_originmask; +static vifi_t numvifs = 0; + +/* + * one-back cache used by multiencap_decap to locate a tunnel's vif + * given a datagram's src ip address. + */ +static u_long last_encap_src; +static struct vif *last_encap_vif; + +static u_long nethash_fc(u_long, u_long); +static struct mfc *mfcfind(u_long, u_long); +int get_sg_cnt(struct sioc_sg_req *); +int get_vif_cnt(struct sioc_vif_req *); +int get_vifs(caddr_t); +static int add_vif(struct vifctl *); +static int del_vif(vifi_t *); +static int add_mfc(struct mfcctl *); +static int del_mfc(struct delmfcctl *); +static void cleanup_cache(void *); +static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, + struct ip_moptions *); +int legal_vif_num(int); +static void phyint_send(struct ip *, struct vif *, struct mbuf *); +static void srcrt_send(struct ip *, struct vif *, struct mbuf *); +static void encap_send(struct ip *, struct vif *, struct mbuf *); +void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, + struct ip_moptions *); +void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); +void tbf_process_q(struct vif *); +void tbf_dequeue(struct vif *, int); +void tbf_reprocess_q(void *); +int tbf_dq_sel(struct vif *, struct ip *); +void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); +void tbf_update_tokens(struct vif *); +static int priority(struct vif *, struct ip *); +static int ip_mrouter_init(struct socket *); + +/* + * A simple hash function: returns MFCHASHMOD of the low-order octet of + * the argument's network or subnet number and the multicast group assoc. + */ +static u_long +nethash_fc(m,n) + register u_long m; + register u_long n; +{ + struct in_addr in1; + struct in_addr in2; + + in1.s_addr = m; + m = in_netof(in1); + while ((m & 0xff) == 0) m >>= 8; + + in2.s_addr = n; + n = in_netof(in2); + while ((n & 0xff) == 0) n >>= 8; + + return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); +} + +/* + * this is a direct-mapped cache used to speed the mapping from a + * datagram source address to the associated multicast route. Note + * that unlike mrttable, the hash is on IP address, not IP net number. + */ +#define MFCHASHSIZ 1024 +#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ + ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) +struct mfc *mfchash[MFCHASHSIZ]; + +/* + * Find a route for a given origin IP address and Multicast group address + * Type of service parameter to be added in the future!!! + */ +#define MFCFIND(o, g, rt) { \ + register u_int _mrhasho = o; \ + register u_int _mrhashg = g; \ + _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ + ++mrtstat.mrts_mfc_lookups; \ + rt = mfchash[_mrhasho]; \ + if ((rt == NULL) || \ + ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ + (g != rt->mfc_mcastgrp.s_addr)) \ + if ((rt = mfcfind(o, g)) != NULL) \ + mfchash[_mrhasho] = rt; \ +} + +/* + * Find route by examining hash table entries + */ +static struct mfc * +mfcfind(origin, mcastgrp) + u_long origin; + u_long mcastgrp; +{ + register struct mbuf *mb_rt; + register struct mfc *rt; + register u_long hash; + + hash = nethash_fc(origin, mcastgrp); + for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); + if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && + (mcastgrp == rt->mfc_mcastgrp.s_addr) && + (mb_rt->m_act == NULL)) + return (rt); + } + mrtstat.mrts_mfc_misses++; + return NULL; +} + +/* + * Macros to compute elapsed time efficiently + * Borrowed from Van Jacobson's scheduling code + */ +#define TV_DELTA(a, b, delta) { \ + register int xxs; \ + \ + delta = (a).tv_usec - (b).tv_usec; \ + if ((xxs = (a).tv_sec - (b).tv_sec)) { \ + switch (xxs) { \ + case 2: \ + delta += 1000000; \ + /* fall through */ \ + case 1: \ + delta += 1000000; \ + break; \ + default: \ + delta += (1000000 * xxs); \ + } \ + } \ +} + +#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ + (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) /* * Handle DVMRP setsockopt commands to modify the multicast routing tables. */ int ip_mrouter_cmd(cmd, so, m) - register int cmd; - register struct socket *so; - register struct mbuf *m; + int cmd; + struct socket *so; + struct mbuf *m; { - register int error = 0; + if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; - if (cmd != DVMRP_INIT && so != ip_mrouter) - error = EACCES; - else switch (cmd) { - - case DVMRP_INIT: - error = ip_mrouter_init(so); - break; - - case DVMRP_DONE: - error = ip_mrouter_done(); - break; - - case DVMRP_ADD_VIF: - if (m == NULL || m->m_len < sizeof(struct vifctl)) - error = EINVAL; - else - error = add_vif(mtod(m, struct vifctl *)); - break; - - case DVMRP_DEL_VIF: - if (m == NULL || m->m_len < sizeof(short)) - error = EINVAL; - else - error = del_vif(mtod(m, vifi_t *)); - break; - - case DVMRP_ADD_LGRP: - if (m == NULL || m->m_len < sizeof(struct lgrplctl)) - error = EINVAL; - else - error = add_lgrp(mtod(m, struct lgrplctl *)); - break; - - case DVMRP_DEL_LGRP: - if (m == NULL || m->m_len < sizeof(struct lgrplctl)) - error = EINVAL; - else - error = del_lgrp(mtod(m, struct lgrplctl *)); - break; - - case DVMRP_ADD_MRT: - if (m == NULL || m->m_len < sizeof(struct mrtctl)) - error = EINVAL; - else - error = add_mrt(mtod(m, struct mrtctl *)); - break; - - case DVMRP_DEL_MRT: - if (m == NULL || m->m_len < sizeof(struct in_addr)) - error = EINVAL; - else - error = del_mrt(mtod(m, struct in_addr *)); - break; - - default: - error = EOPNOTSUPP; - break; - } - return (error); + switch (cmd) { + case DVMRP_INIT: return ip_mrouter_init(so); + case DVMRP_DONE: return ip_mrouter_done(); + case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); + case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); + case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); + case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); + default: return EOPNOTSUPP; + } } + +/* + * Handle ioctl commands to obtain information from the cache + */ +int +mrt_ioctl(cmd, data) + int cmd; + caddr_t data; +{ + int error = 0; + + switch (cmd) { + case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ + return (get_vifs(data)); + break; + case (SIOCGETVIFCNT): + return (get_vif_cnt((struct sioc_vif_req *)data)); + break; + case (SIOCGETSGCNT): + return (get_sg_cnt((struct sioc_sg_req *)data)); + break; + default: + return (EINVAL); + break; + } + return error; +} + +/* + * returns the packet count for the source group provided + */ +int +get_sg_cnt(req) + register struct sioc_sg_req *req; +{ + register struct mfc *rt; + int s; + + s = splnet(); + MFCFIND(req->src.s_addr, req->grp.s_addr, rt); + splx(s); + if (rt != NULL) + req->count = rt->mfc_pkt_cnt; + else + req->count = 0xffffffff; + + return 0; +} + +/* + * returns the input and output packet counts on the interface provided + */ +int +get_vif_cnt(req) + register struct sioc_vif_req *req; +{ + register vifi_t vifi = req->vifi; + + req->icount = viftable[vifi].v_pkt_in; + req->ocount = viftable[vifi].v_pkt_out; + + return 0; +} + +int +get_vifs(data) + char *data; +{ + struct vif_conf *vifc = (struct vif_conf *)data; + struct vif_req *vifrp, vifr; + int space, error=0; + + vifi_t vifi; + int s; + + space = vifc->vifc_len; + vifrp = vifc->vifc_req; + + s = splnet(); + vifc->vifc_num=numvifs; + + for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { + if (viftable[vifi].v_lcl_addr.s_addr != 0) { + vifr.v_flags=viftable[vifi].v_flags; + vifr.v_threshold=viftable[vifi].v_threshold; + vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; + vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; + strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); + if ((space -= sizeof(vifr)) < 0) { + splx(s); + return(ENOSPC); + } + error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); + if (error) { + splx(s); + return(error); + } + } + } + splx(s); + return 0; +} /* * Enable multicast routing */ static int ip_mrouter_init(so) - register struct socket *so; + struct socket *so; { - if (so->so_type != SOCK_RAW || - so->so_proto->pr_protocol != IPPROTO_IGMP) - return (EOPNOTSUPP); + if (so->so_type != SOCK_RAW || + so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; - if (ip_mrouter != NULL) - return (EADDRINUSE); + if (ip_mrouter != NULL) return EADDRINUSE; - ip_mrouter = so; + ip_mrouter = so; - return (0); + if (mrtdebug) + log(LOG_DEBUG, "ip_mrouter_init"); + + return 0; } /* @@ -213,45 +431,82 @@ ip_mrouter_init(so) int ip_mrouter_done() { - register vifi_t vifi; - register int i; - register struct ifnet *ifp; - register int s; - struct ifreq ifr; + vifi_t vifi; + int i; + struct ifnet *ifp; + struct ifreq ifr; + struct mbuf *mb_rt; + struct mbuf *m; + struct rtdetq *rte; + int s; - s = splnet(); + s = splnet(); - /* - * For each phyint in use, free its local group list and - * disable promiscuous reception of all IP multicasts. - */ - for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_lcl_addr.s_addr != 0 && - !(viftable[vifi].v_flags & VIFF_TUNNEL)) { - if (viftable[vifi].v_lcl_grps) - free(viftable[vifi].v_lcl_grps, M_MRTABLE); - satosin(&ifr.ifr_addr)->sin_family = AF_INET; - satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; - ifp = viftable[vifi].v_ifp; - (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); - } + /* + * For each phyint in use, disable promiscuous reception of all IP + * multicasts. + */ + for (vifi = 0; vifi < numvifs; vifi++) { + if (viftable[vifi].v_lcl_addr.s_addr != 0 && + !(viftable[vifi].v_flags & VIFF_TUNNEL)) { + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr + = INADDR_ANY; + ifp = viftable[vifi].v_ifp; + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); } - bzero((caddr_t)viftable, sizeof(viftable)); - numvifs = 0; + } + bzero((caddr_t)qtable, sizeof(qtable)); + bzero((caddr_t)tbftable, sizeof(tbftable)); + bzero((caddr_t)viftable, sizeof(viftable)); + numvifs = 0; - /* - * Free any multicast route entries. - */ - for (i = 0; i < MRTHASHSIZ; i++) - if (mrttable[i]) - free(mrttable[i], M_MRTABLE); - bzero((caddr_t)mrttable, sizeof(mrttable)); - cached_mrt = NULL; + /* + * Check if any outstanding timeouts remain + */ + if (timeout_val != 0) + for (i = 0; i < MFCTBLSIZ; i++) { + mb_rt = mfctable[i]; + while (mb_rt) { + if ( mb_rt->m_act != NULL) { + untimeout(cleanup_cache, (caddr_t)mb_rt); + while (m = mb_rt->m_act) { + mb_rt->m_act = m->m_act; + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); + } + timeout_val--; + } + mb_rt = mb_rt->m_next; + } + if (timeout_val == 0) + break; + } - ip_mrouter = NULL; + /* + * Free all multicast forwarding cache entries. + */ + for (i = 0; i < MFCTBLSIZ; i++) + m_freem(mfctable[i]); - splx(s); - return (0); + bzero((caddr_t)mfctable, sizeof(mfctable)); + bzero((caddr_t)mfchash, sizeof(mfchash)); + + /* + * Reset de-encapsulation cache + */ + last_encap_src = NULL; + last_encap_vif = NULL; + + ip_mrouter = NULL; + + splx(s); + + if (mrtdebug) + log(LOG_DEBUG, "ip_mrouter_done"); + + return 0; } /* @@ -259,61 +514,85 @@ ip_mrouter_done() */ static int add_vif(vifcp) - register struct vifctl *vifcp; + register struct vifctl *vifcp; { - register struct vif *vifp = viftable + vifcp->vifc_vifi; - register struct ifaddr *ifa; - register struct ifnet *ifp; - struct ifreq ifr; - register int error, s; - static struct sockaddr_in sin = { sizeof(sin), AF_INET }; + register struct vif *vifp = viftable + vifcp->vifc_vifi; + static struct sockaddr_in sin = {AF_INET}; + struct ifaddr *ifa; + struct ifnet *ifp; + struct ifreq ifr; + int error, s; + struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; - if (vifcp->vifc_vifi >= MAXVIFS) - return (EINVAL); - if (vifp->v_lcl_addr.s_addr != 0) - return (EADDRINUSE); + if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; + if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; - /* Find the interface with an address in AF_INET family */ - sin.sin_addr = vifcp->vifc_lcl_addr; - ifa = ifa_ifwithaddr((struct sockaddr *)&sin); - if (ifa == 0) - return (EADDRNOTAVAIL); + /* Find the interface with an address in AF_INET family */ + sin.sin_addr = vifcp->vifc_lcl_addr; + ifa = ifa_ifwithaddr((struct sockaddr *)&sin); + if (ifa == 0) return EADDRNOTAVAIL; + ifp = ifa->ifa_ifp; - s = splnet(); - - if (vifcp->vifc_flags & VIFF_TUNNEL) - vifp->v_rmt_addr = vifcp->vifc_rmt_addr; - else { - /* Make sure the interface supports multicast */ - ifp = ifa->ifa_ifp; - if ((ifp->if_flags & IFF_MULTICAST) == 0) { - splx(s); - return (EOPNOTSUPP); - } - /* - * Enable promiscuous reception of all IP multicasts - * from the interface. - */ - satosin(&ifr.ifr_addr)->sin_family = AF_INET; - satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; - error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); - if (error) { - splx(s); - return (error); + if (vifcp->vifc_flags & VIFF_TUNNEL) { + if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { + static int inited = 0; + if(!inited) { + for (s = 0; s < MAXVIFS; ++s) { + multicast_decap_if[s].if_name = "mdecap"; + multicast_decap_if[s].if_unit = s; } + inited = 1; + } + ifp = &multicast_decap_if[vifcp->vifc_vifi]; + } else { + ifp = 0; } + } else { + /* Make sure the interface supports multicast */ + if ((ifp->if_flags & IFF_MULTICAST) == 0) + return EOPNOTSUPP; - vifp->v_flags = vifcp->vifc_flags; - vifp->v_threshold = vifcp->vifc_threshold; - vifp->v_lcl_addr = vifcp->vifc_lcl_addr; - vifp->v_ifp = ifa->ifa_ifp; - - /* Adjust numvifs up if the vifi is higher than numvifs */ - if (numvifs <= vifcp->vifc_vifi) - numvifs = vifcp->vifc_vifi + 1; - + /* Enable promiscuous reception of all IP multicasts from the if */ + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; + s = splnet(); + error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); splx(s); - return (0); + if (error) + return error; + } + + s = splnet(); + /* define parameters for the tbf structure */ + vifp->v_tbf = v_tbf; + vifp->v_tbf->q_len = 0; + vifp->v_tbf->n_tok = 0; + vifp->v_tbf->last_pkt_t = 0; + + vifp->v_flags = vifcp->vifc_flags; + vifp->v_threshold = vifcp->vifc_threshold; + vifp->v_lcl_addr = vifcp->vifc_lcl_addr; + vifp->v_rmt_addr = vifcp->vifc_rmt_addr; + vifp->v_ifp = ifp; + vifp->v_rate_limit= vifcp->vifc_rate_limit; + /* initialize per vif pkt counters */ + vifp->v_pkt_in = 0; + vifp->v_pkt_out = 0; + splx(s); + + /* Adjust numvifs up if the vifi is higher than numvifs */ + if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; + + if (mrtdebug) + log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", + vifcp->vifc_vifi, + ntohl(vifcp->vifc_lcl_addr.s_addr), + (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", + ntohl(vifcp->vifc_rmt_addr.s_addr), + vifcp->vifc_threshold, + vifcp->vifc_rate_limit); + + return 0; } /* @@ -321,296 +600,258 @@ add_vif(vifcp) */ static int del_vif(vifip) - register vifi_t *vifip; + vifi_t *vifip; { - register struct vif *vifp = viftable + *vifip; - register struct ifnet *ifp; - register int i, s; - struct ifreq ifr; + register struct vif *vifp = viftable + *vifip; + register vifi_t vifi; + struct ifnet *ifp; + struct ifreq ifr; + int s; - if (*vifip >= numvifs) - return (EINVAL); - if (vifp->v_lcl_addr.s_addr == 0) - return (EADDRNOTAVAIL); + if (*vifip >= numvifs) return EINVAL; + if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; - s = splnet(); + s = splnet(); - if (!(vifp->v_flags & VIFF_TUNNEL)) { - if (vifp->v_lcl_grps) - free(vifp->v_lcl_grps, M_MRTABLE); - satosin(&ifr.ifr_addr)->sin_family = AF_INET; - satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; - ifp = vifp->v_ifp; - (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); - } + if (!(vifp->v_flags & VIFF_TUNNEL)) { + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; + ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; + ifp = vifp->v_ifp; + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + } - bzero((caddr_t)vifp, sizeof (*vifp)); + if (vifp == last_encap_vif) { + last_encap_vif = 0; + last_encap_src = 0; + } - /* Adjust numvifs down */ - for (i = numvifs - 1; i >= 0; i--) - if (viftable[i].v_lcl_addr.s_addr != 0) - break; - numvifs = i + 1; + bzero((caddr_t)qtable[*vifip], + sizeof(qtable[*vifip])); + bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); + bzero((caddr_t)vifp, sizeof (*vifp)); - splx(s); - return (0); + /* Adjust numvifs down */ + for (vifi = numvifs; vifi > 0; vifi--) + if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; + numvifs = vifi; + + splx(s); + + if (mrtdebug) + log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); + + return 0; } /* - * Add the multicast group in the lgrpctl to the list of local multicast - * group memberships associated with the vif indexed by gcp->lgc_vifi. + * Add an mfc entry */ static int -add_lgrp(gcp) - register struct lgrplctl *gcp; +add_mfc(mfccp) + struct mfcctl *mfccp; { - register struct vif *vifp; - register int s; + struct mfc *rt; + struct mfc *rt1; + register struct mbuf *mb_rt; + struct mbuf *prev_mb_rt; + u_long hash; + struct mbuf *mb_ntry; + struct rtdetq *rte; + register u_short nstl; + int s; + int i; - if (gcp->lgc_vifi >= numvifs) - return (EINVAL); + rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); - vifp = viftable + gcp->lgc_vifi; - if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) - return (EADDRNOTAVAIL); + /* If an entry already exists, just update the fields */ + if (rt) { + if (mrtdebug) + log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + ntohl(mfccp->mfcc_originmask.s_addr), + mfccp->mfcc_parent); - /* If not enough space in existing list, allocate a larger one */ s = splnet(); - if (vifp->v_lcl_grps_n + 1 >= vifp->v_lcl_grps_max) { - register int num; - register struct in_addr *ip; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + splx(s); + return 0; + } - num = vifp->v_lcl_grps_max; - if (num <= 0) - num = 32; /* initial number */ - else - num += num; /* double last number */ - ip = (struct in_addr *)malloc(num * sizeof(*ip), - M_MRTABLE, M_NOWAIT); - if (ip == NULL) { - splx(s); - return (ENOBUFS); - } + /* + * Find the entry for which the upcall was made and update + */ + s = splnet(); + hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); + for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; + mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { - bzero((caddr_t)ip, num * sizeof(*ip)); /* XXX paranoid */ - bcopy((caddr_t)vifp->v_lcl_grps, (caddr_t)ip, - vifp->v_lcl_grps_n * sizeof(*ip)); + rt = mtod(mb_rt, struct mfc *); + if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) + == mfccp->mfcc_origin.s_addr) && + (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && + (mb_rt->m_act != NULL)) { - vifp->v_lcl_grps_max = num; - if (vifp->v_lcl_grps) - free(vifp->v_lcl_grps, M_MRTABLE); - vifp->v_lcl_grps = ip; + if (!nstl++) { + if (mrtdebug) + log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + ntohl(mfccp->mfcc_originmask.s_addr), + mfccp->mfcc_parent, mb_rt->m_act); + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_originmask = mfccp->mfcc_originmask; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + rt1 = rt; + } + + /* prevent cleanup of cache entry */ + untimeout(cleanup_cache, (caddr_t)mb_rt); + timeout_val--; + + /* free packets Qed at the end of this entry */ + while (mb_rt->m_act) { + mb_ntry = mb_rt->m_act; + rte = mtod(mb_ntry, struct rtdetq *); + ip_mdq(rte->m, rte->ifp, rte->tunnel_src, + rt1, rte->imo); + mb_rt->m_act = mb_ntry->m_act; + m_freem(rte->m); + m_free(mb_ntry); + } + + /* + * If more than one entry was created for a single upcall + * delete that entry + */ + if (nstl > 1) { + MFREE(mb_rt, prev_mb_rt->m_next); + mb_rt = prev_mb_rt; + } + } + } + + /* + * It is possible that an entry is being inserted without an upcall + */ + if (nstl == 0) { + if (mrtdebug) + log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x", + hash, ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + ntohl(mfccp->mfcc_originmask.s_addr), + mfccp->mfcc_parent); + + for (prev_mb_rt = mb_rt = mfctable[hash]; + mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + + rt = mtod(mb_rt, struct mfc *); + if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) + == mfccp->mfcc_origin.s_addr) && + (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { + + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_originmask = mfccp->mfcc_originmask; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + } + } + if (mb_rt == NULL) { + /* no upcall, so make a new entry */ + MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); + if (mb_rt == NULL) { splx(s); + return ENOBUFS; + } + + rt = mtod(mb_rt, struct mfc *); + + /* insert new entry at head of hash chain */ + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_originmask = mfccp->mfcc_originmask; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + + /* link into table */ + mb_rt->m_next = mfctable[hash]; + mfctable[hash] = mb_rt; + mb_rt->m_act = NULL; } - - vifp->v_lcl_grps[vifp->v_lcl_grps_n++] = gcp->lgc_gaddr; - - if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) - vifp->v_cached_result = 1; - - splx(s); - return (0); + } + splx(s); + return 0; } /* - * Delete the the local multicast group associated with the vif - * indexed by gcp->lgc_vifi. - */ - -static int -del_lgrp(gcp) - register struct lgrplctl *gcp; -{ - register struct vif *vifp; - register int i, error, s; - - if (gcp->lgc_vifi >= numvifs) - return (EINVAL); - vifp = viftable + gcp->lgc_vifi; - if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) - return (EADDRNOTAVAIL); - - s = splnet(); - - if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) - vifp->v_cached_result = 0; - - error = EADDRNOTAVAIL; - for (i = 0; i < vifp->v_lcl_grps_n; ++i) - if (same(&gcp->lgc_gaddr, &vifp->v_lcl_grps[i])) { - error = 0; - vifp->v_lcl_grps_n--; - bcopy((caddr_t)&vifp->v_lcl_grps[i + 1], - (caddr_t)&vifp->v_lcl_grps[i], - (vifp->v_lcl_grps_n - i) * sizeof(struct in_addr)); - error = 0; - break; - } - - splx(s); - return (error); -} - -/* - * Return 1 if gaddr is a member of the local group list for vifp. + * Delete an mfc entry */ static int -grplst_member(vifp, gaddr) - register struct vif *vifp; - struct in_addr gaddr; +del_mfc(mfccp) + struct delmfcctl *mfccp; { - register int i, s; - register u_long addr; + struct in_addr origin; + struct in_addr mcastgrp; + struct mfc *rt; + struct mbuf *mb_rt; + struct mbuf *prev_mb_rt; + u_long hash; + struct mfc **cmfc; + struct mfc **cmfcend; + int s, i; - mrtstat.mrts_grp_lookups++; + origin = mfccp->mfcc_origin; + mcastgrp = mfccp->mfcc_mcastgrp; + hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); - addr = gaddr.s_addr; - if (addr == vifp->v_cached_group) - return (vifp->v_cached_result); + if (mrtdebug) + log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", + ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); - mrtstat.mrts_grp_misses++; + for (prev_mb_rt = mb_rt = mfctable[hash] + ; mb_rt + ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); + if (origin.s_addr == rt->mfc_origin.s_addr && + mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && + mb_rt->m_act == NULL) + break; + } + if (mb_rt == NULL) { + return ESRCH; + } - for (i = 0; i < vifp->v_lcl_grps_n; ++i) - if (addr == vifp->v_lcl_grps[i].s_addr) { - s = splnet(); - vifp->v_cached_group = addr; - vifp->v_cached_result = 1; - splx(s); - return (1); - } - s = splnet(); - vifp->v_cached_group = addr; - vifp->v_cached_result = 0; - splx(s); - return (0); -} + s = splnet(); -/* - * A simple hash function: returns MRTHASHMOD of the low-order octet of - * the argument's network or subnet number. - */ -static u_long -nethash(in) - struct in_addr in; -{ - register u_long n; + cmfc = mfchash; + cmfcend = cmfc + MFCHASHSIZ; + for ( ; cmfc < cmfcend; ++cmfc) + if (*cmfc == rt) + *cmfc = 0; - n = in_netof(in); - while ((n & 0xff) == 0) - n >>= 8; - return (MRTHASHMOD(n)); -} + if (prev_mb_rt != mb_rt) { /* if moved past head of list */ + MFREE(mb_rt, prev_mb_rt->m_next); + } else /* delete head of list, it is in the table */ + mfctable[hash] = m_free(mb_rt); -/* - * Add an mrt entry - */ -static int -add_mrt(mrtcp) - register struct mrtctl *mrtcp; -{ - struct mrt *rt; - u_long hash; - int s; + splx(s); - if (rt = mrtfind(mrtcp->mrtc_origin)) { - /* Just update the route */ - s = splnet(); - rt->mrt_parent = mrtcp->mrtc_parent; - VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); - VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); - splx(s); - return (0); - } - - s = splnet(); - - rt = (struct mrt *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); - if (rt == NULL) { - splx(s); - return (ENOBUFS); - } - - /* - * insert new entry at head of hash chain - */ - rt->mrt_origin = mrtcp->mrtc_origin; - rt->mrt_originmask = mrtcp->mrtc_originmask; - rt->mrt_parent = mrtcp->mrtc_parent; - VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); - VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); - /* link into table */ - hash = nethash(mrtcp->mrtc_origin); - rt->mrt_next = mrttable[hash]; - mrttable[hash] = rt; - - splx(s); - return (0); -} - -/* - * Delete an mrt entry - */ -static int -del_mrt(origin) - register struct in_addr *origin; -{ - register struct mrt *rt, *prev_rt; - register u_long hash = nethash(*origin); - register int s; - - for (prev_rt = rt = mrttable[hash]; rt; prev_rt = rt, rt = rt->mrt_next) - if (origin->s_addr == rt->mrt_origin.s_addr) - break; - if (!rt) - return (ESRCH); - - s = splnet(); - - if (rt == cached_mrt) - cached_mrt = NULL; - - if (prev_rt == rt) - mrttable[hash] = rt->mrt_next; - else - prev_rt->mrt_next = rt->mrt_next; - free(rt, M_MRTABLE); - - splx(s); - return (0); -} - -/* - * Find a route for a given origin IP address. - */ -static struct mrt * -mrtfind(origin) - struct in_addr origin; -{ - register struct mrt *rt; - register u_int hash; - register int s; - - mrtstat.mrts_mrt_lookups++; - - if (cached_mrt != NULL && - (origin.s_addr & cached_originmask) == cached_origin) - return (cached_mrt); - - mrtstat.mrts_mrt_misses++; - - hash = nethash(origin); - for (rt = mrttable[hash]; rt; rt = rt->mrt_next) - if ((origin.s_addr & rt->mrt_originmask.s_addr) == - rt->mrt_origin.s_addr) { - s = splnet(); - cached_mrt = rt; - cached_origin = rt->mrt_origin.s_addr; - cached_originmask = rt->mrt_originmask.s_addr; - splx(s); - return (rt); - } - return (NULL); + return 0; } /* @@ -628,209 +869,914 @@ mrtfind(origin) #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ int -ip_mforward(m, ifp) - register struct mbuf *m; - register struct ifnet *ifp; +ip_mforward(ip, ifp, m, imo) + struct mbuf *m; + register struct ip *ip; + struct ifnet *ifp; + struct ip_moptions *imo; { - register struct ip *ip = mtod(m, struct ip *); - register struct mrt *rt; - register struct vif *vifp; - register int vifi; - register u_char *ipoptions; - u_long tunnel_src; + register struct mfc *rt; + register struct vif *vifp; + register u_char *ipoptions; + u_long tunnel_src; + static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; + static struct sockaddr_in k_igmpsrc = { AF_INET }; + static struct sockaddr_in k_igmpdst = { AF_INET }; + register struct mbuf *mm; + register struct mbuf *mn; + register struct ip *k_data; + int s; - if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || - (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { - /* - * Packet arrived via a physical interface. - */ - tunnel_src = 0; - } else { - /* - * Packet arrived through a tunnel. - * - * A tunneled packet has a single NOP option and a - * two-element loose-source-and-record-route (LSRR) - * option immediately following the fixed-size part of - * the IP header. At this point in processing, the IP - * header should contain the following IP addresses: - * - * original source - in the source address field - * destination group - in the destination address field - * remote tunnel end-point - in the first element of LSRR - * one of this host's addrs - in the second element of LSRR - * - * NOTE: RFC-1075 would have the original source and - * remote tunnel end-point addresses swapped. However, - * that could cause delivery of ICMP error messages to - * innocent applications on intermediate routing - * hosts! Therefore, we hereby change the spec. - */ - - /* - * Verify that the tunnel options are well-formed. - */ - if (ipoptions[0] != IPOPT_NOP || - ipoptions[2] != 11 || /* LSRR option length */ - ipoptions[3] != 12 || /* LSRR address pointer */ - (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { - mrtstat.mrts_bad_tunnel++; - return (1); - } - - /* - * Delete the tunnel options from the packet. - */ - ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, - (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); - m->m_len -= TUNNEL_LEN; - ip->ip_len -= TUNNEL_LEN; - ip->ip_hl -= TUNNEL_LEN >> 2; - } + if (mrtdebug > 1) + log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); + if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || + (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { /* - * Don't forward a packet with time-to-live of zero or one, - * or a packet destined to a local-only group. + * Packet arrived via a physical interface. */ - if (ip->ip_ttl <= 1 || - ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) - return ((int)tunnel_src); - + tunnel_src = 0; + } else { /* - * Don't forward if we don't have a route for the packet's origin. - */ - if (!(rt = mrtfind(ip->ip_src))) { - mrtstat.mrts_no_route++; - return ((int)tunnel_src); - } - - /* - * Don't forward if it didn't arrive from the parent vif for its origin. - */ - vifi = rt->mrt_parent; - if (tunnel_src == 0 ) { - if ((viftable[vifi].v_flags & VIFF_TUNNEL) || - viftable[vifi].v_ifp != ifp ) - return ((int)tunnel_src); - } else { - if (!(viftable[vifi].v_flags & VIFF_TUNNEL) || - viftable[vifi].v_rmt_addr.s_addr != tunnel_src ) - return ((int)tunnel_src); - } - - /* - * For each vif, decide if a copy of the packet should be forwarded. - * Forward if: - * - the ttl exceeds the vif's threshold AND - * - the vif is a child in the origin's route AND - * - ( the vif is not a leaf in the origin's route OR - * the destination group has members on the vif ) + * Packet arrived through a source-route tunnel. * - * (This might be speeded up with some sort of cache -- someday.) + * A source-route tunneled packet has a single NOP option and a + * two-element + * loose-source-and-record-route (LSRR) option immediately following + * the fixed-size part of the IP header. At this point in processing, + * the IP header should contain the following IP addresses: + * + * original source - in the source address field + * destination group - in the destination address field + * remote tunnel end-point - in the first element of LSRR + * one of this host's addrs - in the second element of LSRR + * + * NOTE: RFC-1075 would have the original source and remote tunnel + * end-point addresses swapped. However, that could cause + * delivery of ICMP error messages to innocent applications + * on intermediate routing hosts! Therefore, we hereby + * change the spec. */ - for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) { - if (ip->ip_ttl > vifp->v_threshold && - VIFM_ISSET(vifi, rt->mrt_children) && - (!VIFM_ISSET(vifi, rt->mrt_leaves) || - grplst_member(vifp, ip->ip_dst))) { - if (vifp->v_flags & VIFF_TUNNEL) - tunnel_send(m, vifp); - else - phyint_send(m, vifp); - } + + /* + * Verify that the tunnel options are well-formed. + */ + if (ipoptions[0] != IPOPT_NOP || + ipoptions[2] != 11 || /* LSRR option length */ + ipoptions[3] != 12 || /* LSRR address pointer */ + (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { + mrtstat.mrts_bad_tunnel++; + if (mrtdebug) + log(LOG_DEBUG, + "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)", + ntohl(ip->ip_src.s_addr), + ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], + *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); + return 1; } - return ((int)tunnel_src); + /* + * Delete the tunnel options from the packet. + */ + ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, + (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); + m->m_len -= TUNNEL_LEN; + ip->ip_len -= TUNNEL_LEN; + ip->ip_hl -= TUNNEL_LEN >> 2; + + ifp = 0; + } + + /* + * Don't forward a packet with time-to-live of zero or one, + * or a packet destined to a local-only group. + */ + if (ip->ip_ttl <= 1 || + ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) + return (int)tunnel_src; + + /* + * Determine forwarding vifs from the forwarding cache table + */ + s = splnet(); + MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); + + /* Entry exists, so forward if necessary */ + if (rt != NULL) { + splx(s); + return (ip_mdq(m, ifp, tunnel_src, rt, imo)); + } + + else { + /* + * If we don't have a route for packet's origin, + * Make a copy of the packet & + * send message to routing daemon + */ + + register struct mbuf *mb_rt; + register struct mbuf *mb_ntry; + register struct mbuf *mb0; + register struct rtdetq *rte; + register struct mbuf *rte_m; + register u_long hash; + register struct timeval tp; + + mrtstat.mrts_no_route++; + if (mrtdebug) + log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", + ntohl(ip->ip_src.s_addr), + ntohl(ip->ip_dst.s_addr)); + + /* is there an upcall waiting for this packet? */ + hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); + for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); + if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == + rt->mfc_origin.s_addr) && + (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && + (mb_rt->m_act != NULL)) + break; + } + + if (mb_rt == NULL) { + /* no upcall, so make a new entry */ + MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); + if (mb_rt == NULL) { + splx(s); + return ENOBUFS; + } + + rt = mtod(mb_rt, struct mfc *); + + /* insert new entry at head of hash chain */ + rt->mfc_origin.s_addr = ip->ip_src.s_addr; + rt->mfc_originmask.s_addr = (u_long)0xffffffff; + rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; + + /* link into table */ + hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); + mb_rt->m_next = mfctable[hash]; + mfctable[hash] = mb_rt; + mb_rt->m_act = NULL; + + } + + /* determine if q has overflowed */ + for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) + hash++; + + if (hash > MAX_UPQ) { + mrtstat.mrts_upq_ovflw++; + splx(s); + return 0; + } + + /* add this packet and timing, ifp info to m_act */ + MGET(mb_ntry, M_DONTWAIT, MT_DATA); + if (mb_ntry == NULL) { + splx(s); + return ENOBUFS; + } + + mb_ntry->m_act = NULL; + rte = mtod(mb_ntry, struct rtdetq *); + + mb0 = m_copy(m, 0, M_COPYALL); + if (mb0 == NULL) { + splx(s); + return ENOBUFS; + } + + rte->m = mb0; + rte->ifp = ifp; + rte->tunnel_src = tunnel_src; + rte->imo = imo; + + rte_m->m_act = mb_ntry; + + splx(s); + + if (hash == 0) { + /* + * Send message to routing daemon to install + * a route into the kernel table + */ + k_igmpsrc.sin_addr = ip->ip_src; + k_igmpdst.sin_addr = ip->ip_dst; + + mm = m_copy(m, 0, M_COPYALL); + if (mm == NULL) { + splx(s); + return ENOBUFS; + } + + k_data = mtod(mm, struct ip *); + k_data->ip_p = 0; + + mrtstat.mrts_upcalls++; + + raw_input(mm, &k_igmpproto, + (struct sockaddr *)&k_igmpsrc, + (struct sockaddr *)&k_igmpdst); + + /* set timer to cleanup entry if upcall is lost */ + timeout(cleanup_cache, (caddr_t)mb_rt, 100); + timeout_val++; + } + + return 0; + } +} + +/* + * Clean up the cache entry if upcall is not serviced + */ +static void +cleanup_cache(xmb_rt) + void *xmb_rt; +{ + struct mbuf *mb_rt = xmb_rt; + struct mfc *rt; + u_long hash; + struct mbuf *prev_m0; + struct mbuf *m0; + struct mbuf *m; + struct rtdetq *rte; + int s; + + rt = mtod(mb_rt, struct mfc *); + hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); + + if (mrtdebug) + log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x", + ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), + ntohl(rt->mfc_mcastgrp.s_addr)); + + mrtstat.mrts_cache_cleanups++; + + /* + * determine entry to be cleaned up in cache table + */ + s = splnet(); + for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) + if (m0 == mb_rt) + break; + + /* + * drop all the packets + * free the mbuf with the pkt, if, timing info + */ + while (mb_rt->m_act) { + m = mb_rt->m_act; + mb_rt->m_act = m->m_act; + + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); + } + + /* + * Delete the entry from the cache + */ + if (prev_m0 != m0) { /* if moved past head of list */ + MFREE(m0, prev_m0->m_next); + } else /* delete head of list, it is in the table */ + mfctable[hash] = m_free(m0); + + timeout_val--; + splx(s); +} + +/* + * Packet forwarding routine once entry in the cache is made + */ +static int +ip_mdq(m, ifp, tunnel_src, rt, imo) + register struct mbuf *m; + register struct ifnet *ifp; + register u_long tunnel_src; + register struct mfc *rt; + register struct ip_moptions *imo; +{ + register struct ip *ip = mtod(m, struct ip *); + register vifi_t vifi; + register struct vif *vifp; + + /* + * Don't forward if it didn't arrive from the parent vif for its origin. + * Notes: v_ifp is zero for src route tunnels, multicast_decap_if + * for encapsulated tunnels and a real ifnet for non-tunnels so + * the first part of the if catches wrong physical interface or + * tunnel type; v_rmt_addr is zero for non-tunneled packets so + * the 2nd part catches both packets that arrive via a tunnel + * that shouldn't and packets that arrive via the wrong tunnel. + */ + vifi = rt->mfc_parent; + if (viftable[vifi].v_ifp != ifp || + (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { + /* came in the wrong interface */ + if (mrtdebug) + log(LOG_DEBUG, "wrong if: ifp %x vifi %d", + ifp, vifi); + ++mrtstat.mrts_wrong_if; + return (int)tunnel_src; + } + + /* increment the interface and s-g counters */ + viftable[vifi].v_pkt_in++; + rt->mfc_pkt_cnt++; + + /* + * For each vif, decide if a copy of the packet should be forwarded. + * Forward if: + * - the ttl exceeds the vif's threshold + * - there are group members downstream on interface + */ +#define MC_SEND(ip,vifp,m) { \ + (vifp)->v_pkt_out++; \ + if ((vifp)->v_flags & VIFF_SRCRT) \ + srcrt_send((ip), (vifp), (m)); \ + else if ((vifp)->v_flags & VIFF_TUNNEL) \ + encap_send((ip), (vifp), (m)); \ + else \ + phyint_send((ip), (vifp), (m)); \ + } + +/* If no options or the imo_multicast_vif option is 0, don't do this part + */ + if ((imo != NULL) && + (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) + { + MC_SEND(ip,viftable+vifi,m); + return (1); /* make sure we are done: No more physical sends */ + } + + for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) + if ((rt->mfc_ttls[vifi] > 0) && + (ip->ip_ttl > rt->mfc_ttls[vifi])) + MC_SEND(ip, vifp, m); + + return 0; +} + +/* check if a vif number is legal/ok. This is used by ip_output, to export + * numvifs there, + */ +int +legal_vif_num(vif) + int vif; +{ if (vif>=0 && vif<=numvifs) + return(1); + else + return(0); } static void -phyint_send(m, vifp) - register struct mbuf *m; - register struct vif *vifp; +phyint_send(ip, vifp, m) + struct ip *ip; + struct vif *vifp; + struct mbuf *m; { - register struct ip *ip = mtod(m, struct ip *); - register struct mbuf *mb_copy; - register struct ip_moptions *imo; - register int error; - struct ip_moptions simo; + register struct mbuf *mb_copy; + register struct mbuf *mopts; + register struct ip_moptions *imo; - mb_copy = m_copy(m, 0, M_COPYALL); - if (mb_copy == NULL) - return; + if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) + return; - imo = &simo; - imo->imo_multicast_ifp = vifp->v_ifp; - imo->imo_multicast_ttl = ip->ip_ttl - 1; - imo->imo_multicast_loop = 1; + MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); + if (imo == NULL) { + m_freem(mb_copy); + return; + } - error = ip_output(mb_copy, NULL, NULL, IP_FORWARDING, imo); + imo->imo_multicast_ifp = vifp->v_ifp; + imo->imo_multicast_ttl = ip->ip_ttl - 1; + imo->imo_multicast_loop = 1; + + if (vifp->v_rate_limit <= 0) + tbf_send_packet(vifp, mb_copy, imo); + else + tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, + imo); } static void -tunnel_send(m, vifp) - register struct mbuf *m; - register struct vif *vifp; +srcrt_send(ip, vifp, m) + struct ip *ip; + struct vif *vifp; + struct mbuf *m; { - register struct ip *ip = mtod(m, struct ip *); - register struct mbuf *mb_copy, *mb_opts; - register struct ip *ip_copy; - register int error; - register u_char *cp; + struct mbuf *mb_copy, *mb_opts; + register struct ip *ip_copy; + u_char *cp; - /* - * Make sure that adding the tunnel options won't exceed the - * maximum allowed number of option bytes. - */ - if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { - mrtstat.mrts_cant_tunnel++; - return; - } + /* + * Make sure that adding the tunnel options won't exceed the + * maximum allowed number of option bytes. + */ + if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { + mrtstat.mrts_cant_tunnel++; + if (mrtdebug) + log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u", + ntohl(ip->ip_src.s_addr)); + return; + } - /* - * Get a private copy of the IP header so that changes to some - * of the IP fields don't damage the original header, which is - * examined later in ip_input.c. - */ - mb_copy = m_copy(m, IP_HDR_LEN, M_COPYALL); - if (mb_copy == NULL) - return; - MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); - if (mb_opts == NULL) { - m_freem(mb_copy); - return; - } - /* - * Make mb_opts be the new head of the packet chain. - * Any options of the packet were left in the old packet chain head - */ - mb_opts->m_next = mb_copy; - mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; - mb_opts->m_data += MSIZE - mb_opts->m_len; + if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) + return; - ip_copy = mtod(mb_opts, struct ip *); - /* - * Copy the base ip header to the new head mbuf. - */ - *ip_copy = *ip; - ip_copy->ip_ttl--; - ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ - /* - * Adjust the ip header length to account for the tunnel options. - */ - ip_copy->ip_hl += TUNNEL_LEN >> 2; - ip_copy->ip_len += TUNNEL_LEN; - /* - * Add the NOP and LSRR after the base ip header - */ - cp = (u_char *)(ip_copy + 1); - *cp++ = IPOPT_NOP; - *cp++ = IPOPT_LSRR; - *cp++ = 11; /* LSRR option length */ - *cp++ = 8; /* LSSR pointer to second element */ - *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ - cp += 4; - *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ + ip_copy = mtod(mb_copy, struct ip *); + ip_copy->ip_ttl--; + ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ + /* + * Adjust the ip header length to account for the tunnel options. + */ + ip_copy->ip_hl += TUNNEL_LEN >> 2; + ip_copy->ip_len += TUNNEL_LEN; + MGET(mb_opts, M_DONTWAIT, MT_HEADER); + if (mb_opts == NULL) { + m_freem(mb_copy); + return; + } + /* + * 'Delete' the base ip header from the mb_copy chain + */ + mb_copy->m_len -= IP_HDR_LEN; + mb_copy->m_data += IP_HDR_LEN; + /* + * Make mb_opts be the new head of the packet chain. + * Any options of the packet were left in the old packet chain head + */ + mb_opts->m_next = mb_copy; + mb_opts->m_data += 16; + mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; + /* + * Copy the base ip header from the mb_copy chain to the new head mbuf + */ + bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN); + /* + * Add the NOP and LSRR after the base ip header + */ + cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; + *cp++ = IPOPT_NOP; + *cp++ = IPOPT_LSRR; + *cp++ = 11; /* LSRR option length */ + *cp++ = 8; /* LSSR pointer to second element */ + *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ + cp += 4; + *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ - error = ip_output(mb_opts, NULL, NULL, IP_FORWARDING, NULL); + if (vifp->v_rate_limit <= 0) + tbf_send_packet(vifp, mb_opts, 0); + else + tbf_control(vifp, mb_opts, + mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); } + +static void +encap_send(ip, vifp, m) + register struct ip *ip; + register struct vif *vifp; + register struct mbuf *m; +{ + register struct mbuf *mb_copy; + register struct ip *ip_copy; + register int i, len = ip->ip_len; + + /* + * copy the old packet & pullup it's IP header into the + * new mbuf so we can modify it. Try to fill the new + * mbuf since if we don't the ethernet driver will. + */ + MGET(mb_copy, M_DONTWAIT, MT_DATA); + if (mb_copy == NULL) + return; + mb_copy->m_data += 16; + mb_copy->m_len = sizeof(multicast_encap_iphdr); + + if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { + m_freem(mb_copy); + return; + } + i = MHLEN - M_LEADINGSPACE(mb_copy); + if (i > len) + i = len; + mb_copy = m_pullup(mb_copy, i); + if (mb_copy == NULL) + return; + + /* + * fill in the encapsulating IP header. + */ + ip_copy = mtod(mb_copy, struct ip *); + *ip_copy = multicast_encap_iphdr; + ip_copy->ip_id = htons(ip_id++); + ip_copy->ip_len += len; + ip_copy->ip_src = vifp->v_lcl_addr; + ip_copy->ip_dst = vifp->v_rmt_addr; + + /* + * turn the encapsulated IP header back into a valid one. + */ + ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); + --ip->ip_ttl; + HTONS(ip->ip_len); + HTONS(ip->ip_off); + ip->ip_sum = 0; +#if defined(LBL) && !defined(ultrix) + ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); +#else + mb_copy->m_data += sizeof(multicast_encap_iphdr); + ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); + mb_copy->m_data -= sizeof(multicast_encap_iphdr); #endif + + if (vifp->v_rate_limit <= 0) + tbf_send_packet(vifp, mb_copy, 0); + else + tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); +} + +/* + * De-encapsulate a packet and feed it back through ip input (this + * routine is called whenever IP gets a packet with proto type + * ENCAP_PROTO and a local destination address). + */ +void +multiencap_decap(m) + register struct mbuf *m; +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + register struct ip *ip = mtod(m, struct ip *); + register int hlen = ip->ip_hl << 2; + register int s; + register struct ifqueue *ifq; + register struct vif *vifp; + + if (ip->ip_p != ENCAP_PROTO) { + rip_input(m); + return; + } + /* + * dump the packet if it's not to a multicast destination or if + * we don't have an encapsulating tunnel with the source. + * Note: This code assumes that the remote site IP address + * uniquely identifies the tunnel (i.e., that this site has + * at most one tunnel with the remote site). + */ + if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { + ++mrtstat.mrts_bad_tunnel; + m_freem(m); + return; + } + if (ip->ip_src.s_addr != last_encap_src) { + register struct vif *vife; + + vifp = viftable; + vife = vifp + numvifs; + last_encap_src = ip->ip_src.s_addr; + last_encap_vif = 0; + for ( ; vifp < vife; ++vifp) + if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { + if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) + == VIFF_TUNNEL) + last_encap_vif = vifp; + break; + } + } + if ((vifp = last_encap_vif) == 0) { + last_encap_src = 0; + mrtstat.mrts_cant_tunnel++; /*XXX*/ + m_freem(m); + if (mrtdebug) + log(LOG_DEBUG, "ip_mforward: no tunnel with %u", + ntohl(ip->ip_src.s_addr)); + return; + } + ifp = vifp->v_ifp; + hlen -= sizeof(struct ifnet *); + m->m_data += hlen; + m->m_len -= hlen; + *(mtod(m, struct ifnet **)) = ifp; + ifq = &ipintrq; + s = splimp(); + if (IF_QFULL(ifq)) { + IF_DROP(ifq); + m_freem(m); + } else { + IF_ENQUEUE(ifq, m); + /* + * normally we would need a "schednetisr(NETISR_IP)" + * here but we were called by ip_input and it is going + * to loop back & try to dequeue the packet we just + * queued as soon as we return so we avoid the + * unnecessary software interrrupt. + */ + } + splx(s); +} + +/* + * Token bucket filter module + */ +void +tbf_control(vifp, m, ip, p_len, imo) + register struct vif *vifp; + register struct mbuf *m; + register struct ip *ip; + register u_long p_len; + struct ip_moptions *imo; +{ + tbf_update_tokens(vifp); + + /* if there are enough tokens, + * and the queue is empty, + * send this packet out + */ + + if (vifp->v_tbf->q_len == 0) { + if (p_len <= vifp->v_tbf->n_tok) { + vifp->v_tbf->n_tok -= p_len; + tbf_send_packet(vifp, m, imo); + } else if (p_len > MAX_BKT_SIZE) { + /* drop if packet is too large */ + mrtstat.mrts_pkt2large++; + m_freem(m); + return; + } else { + /* queue packet and timeout till later */ + tbf_queue(vifp, m, ip, imo); + timeout(tbf_reprocess_q, (caddr_t)vifp, 1); + } + } else if (vifp->v_tbf->q_len < MAXQSIZE) { + /* finite queue length, so queue pkts and process queue */ + tbf_queue(vifp, m, ip, imo); + tbf_process_q(vifp); + } else { + /* queue length too much, try to dq and queue and process */ + if (!tbf_dq_sel(vifp, ip)) { + mrtstat.mrts_q_overflow++; + m_freem(m); + return; + } else { + tbf_queue(vifp, m, ip, imo); + tbf_process_q(vifp); + } + } + return; +} + +/* + * adds a packet to the queue at the interface + */ +void +tbf_queue(vifp, m, ip, imo) + register struct vif *vifp; + register struct mbuf *m; + register struct ip *ip; + struct ip_moptions *imo; +{ + register u_long ql; + register int index = (vifp - viftable); + register int s = splnet(); + + ql = vifp->v_tbf->q_len; + + qtable[index][ql].pkt_m = m; + qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; + qtable[index][ql].pkt_ip = ip; + qtable[index][ql].pkt_imo = imo; + + vifp->v_tbf->q_len++; + splx(s); +} + + +/* + * processes the queue at the interface + */ +void +tbf_process_q(vifp) + register struct vif *vifp; +{ + register struct mbuf *m; + register struct pkt_queue pkt_1; + register int index = (vifp - viftable); + register int s = splnet(); + + /* loop through the queue at the interface and send as many packets + * as possible + */ + while (vifp->v_tbf->q_len > 0) { + /* locate the first packet */ + pkt_1.pkt_len = ((qtable[index][0]).pkt_len); + pkt_1.pkt_m = (qtable[index][0]).pkt_m; + pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; + pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; + + /* determine if the packet can be sent */ + if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { + /* if so, + * reduce no of tokens, dequeue the queue, + * send the packet. + */ + vifp->v_tbf->n_tok -= pkt_1.pkt_len; + + tbf_dequeue(vifp, 0); + + tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); + + } else break; + } + splx(s); +} + +/* + * removes the jth packet from the queue at the interface + */ +void +tbf_dequeue(vifp,j) + register struct vif *vifp; + register int j; +{ + register u_long index = vifp - viftable; + register int i; + + for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { + qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; + qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; + qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; + qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; + } + qtable[index][i-1].pkt_m = NULL; + qtable[index][i-1].pkt_len = NULL; + qtable[index][i-1].pkt_ip = NULL; + qtable[index][i-1].pkt_imo = NULL; + + vifp->v_tbf->q_len--; + + if (tbfdebug > 1) + log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); +} + +void +tbf_reprocess_q(xvifp) + void *xvifp; +{ + register struct vif *vifp = xvifp; + if (ip_mrouter == NULL) + return; + + tbf_update_tokens(vifp); + + tbf_process_q(vifp); + + if (vifp->v_tbf->q_len) + timeout(tbf_reprocess_q, (caddr_t)vifp, 1); +} + +/* function that will selectively discard a member of the queue + * based on the precedence value and the priority obtained through + * a lookup table - not yet implemented accurately! + */ +int +tbf_dq_sel(vifp, ip) + register struct vif *vifp; + register struct ip *ip; +{ + register int i; + register int s = splnet(); + register u_int p; + + p = priority(vifp, ip); + + for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { + if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { + m_freem(qtable[vifp-viftable][i].pkt_m); + tbf_dequeue(vifp,i); + splx(s); + mrtstat.mrts_drop_sel++; + return(1); + } + } + splx(s); + return(0); +} + +void +tbf_send_packet(vifp, m, imo) + register struct vif *vifp; + register struct mbuf *m; + struct ip_moptions *imo; +{ + register struct mbuf *mcp; + int error; + int s = splnet(); + + /* if source route tunnels */ + if (vifp->v_flags & VIFF_SRCRT) { + error = ip_output(m, (struct mbuf *)0, (struct route *)0, + IP_FORWARDING, imo); + if (mrtdebug > 1) + log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error); + } else if (vifp->v_flags & VIFF_TUNNEL) { + /* If tunnel options */ + ip_output(m, (struct mbuf *)0, (struct route *)0, + IP_FORWARDING, imo); + } else { + /* if physical interface option, extract the options and then send */ + error = ip_output(m, (struct mbuf *)0, (struct route *)0, + IP_FORWARDING, imo); + FREE(imo, M_IPMOPTS); + + if (mrtdebug > 1) + log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); + } + splx(s); +} + +/* determine the current time and then + * the elapsed time (between the last time and time now) + * in milliseconds & update the no. of tokens in the bucket + */ +void +tbf_update_tokens(vifp) + register struct vif *vifp; +{ + struct timeval tp; + register u_long t; + register u_long elapsed; + register int s = splnet(); + + GET_TIME(tp); + + t = tp.tv_sec*1000 + tp.tv_usec/1000; + + elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; + vifp->v_tbf->n_tok += elapsed; + vifp->v_tbf->last_pkt_t = t; + + if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) + vifp->v_tbf->n_tok = MAX_BKT_SIZE; + + splx(s); +} + +static int +priority(vifp, ip) + register struct vif *vifp; + register struct ip *ip; +{ + register u_long graddr; + register int prio; + + /* temporary hack; will add general packet classifier some day */ + + prio = 50; /* default priority */ + + /* check for source route options and add option length to get dst */ + if (vifp->v_flags & VIFF_SRCRT) + graddr = ntohl((ip+8)->ip_dst.s_addr); + else + graddr = ntohl(ip->ip_dst.s_addr); + + switch (graddr & 0xf) { + case 0x0: break; + case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ + break; + case 0x2: break; + case 0x3: break; + case 0x4: break; + case 0x5: break; + case 0x6: break; + case 0x7: break; + case 0x8: break; + case 0x9: break; + case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ + break; + case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ + break; + case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ + break; + case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ + break; + case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ + break; + case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ + break; + } + + if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio); + + return prio; +} + +/* + * End of token bucket filter modifications + */ +#endif + + diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h index 6f0382a08c7b..304b3ceebf1f 100644 --- a/sys/netinet/ip_mroute.h +++ b/sys/netinet/ip_mroute.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93 - * $Id: ip_mroute.h,v 1.2 1994/08/02 07:48:42 davidg Exp $ + * $Id: ip_mroute.h,v 1.3 1994/08/21 05:27:32 paul Exp $ */ #ifndef _NETINET_IP_MROUTE_H_ @@ -48,23 +48,24 @@ * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. + * Modified by Ajit Thyagarajan, PARC, August 1993. + * Modified by Ajit Thyagarajan, PARC, August 1994. * - * MROUTING 1.0 + * MROUTING 1.5 */ /* * DVMRP-specific setsockopt commands. */ -#define DVMRP_INIT 100 -#define DVMRP_DONE 101 -#define DVMRP_ADD_VIF 102 -#define DVMRP_DEL_VIF 103 -#define DVMRP_ADD_LGRP 104 -#define DVMRP_DEL_LGRP 105 -#define DVMRP_ADD_MRT 106 -#define DVMRP_DEL_MRT 107 +#define DVMRP_INIT 100 /* initialize forwarder */ +#define DVMRP_DONE 101 /* shut down forwarder */ +#define DVMRP_ADD_VIF 102 /* create virtual interface */ +#define DVMRP_DEL_VIF 103 /* delete virtual interface */ +#define DVMRP_ADD_MFC 104 /* insert forwarding cache entry */ +#define DVMRP_DEL_MFC 105 /* delete forwarding cache entry */ +#define GET_TIME(t) microtime(&t) /* * Types and macros for handling bitmaps with one bit per virtual interface. @@ -82,97 +83,172 @@ typedef u_short vifi_t; /* type of a vif index */ /* - * Agument structure for DVMRP_ADD_VIF. + * Argument structure for DVMRP_ADD_VIF. * (DVMRP_DEL_VIF takes a single vifi_t argument.) */ struct vifctl { vifi_t vifc_vifi; /* the index of the vif to be added */ u_char vifc_flags; /* VIFF_ flags defined below */ u_char vifc_threshold; /* min ttl required to forward on vif */ + u_int vifc_rate_limit; /* max tate */ struct in_addr vifc_lcl_addr; /* local interface address */ struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */ }; #define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */ - +#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */ /* - * Argument structure for DVMRP_ADD_LGRP and DVMRP_DEL_LGRP. + * Argument structure for DVMRP_ADD_MFC + * (mfcc_tos to be added at a future point) */ -struct lgrplctl { - vifi_t lgc_vifi; - struct in_addr lgc_gaddr; -}; - - -/* - * Argument structure for DVMRP_ADD_MRT. - * (DVMRP_DEL_MRT takes a single struct in_addr argument, containing origin.) - */ -struct mrtctl { - struct in_addr mrtc_origin; /* subnet origin of multicasts */ - struct in_addr mrtc_originmask; /* subnet mask for origin */ - vifi_t mrtc_parent; /* incoming vif */ - vifbitmap_t mrtc_children; /* outgoing children vifs */ - vifbitmap_t mrtc_leaves; /* subset of outgoing children vifs */ -}; - - -#ifdef KERNEL - -/* - * The kernel's virtual-interface structure. - */ -struct vif { - u_char v_flags; /* VIFF_ flags defined above */ - u_char v_threshold; /* min ttl required to forward on vif */ - struct in_addr v_lcl_addr; /* local interface address */ - struct in_addr v_rmt_addr; /* remote address (tunnels only) */ - struct ifnet *v_ifp; /* pointer to interface */ - struct in_addr *v_lcl_grps; /* list of local grps (phyints only) */ - int v_lcl_grps_max; /* malloc'ed number of v_lcl_grps */ - int v_lcl_grps_n; /* used number of v_lcl_grps */ - u_long v_cached_group; /* last grp looked-up (phyints only) */ - int v_cached_result; /* last look-up result (phyints only) */ +struct mfcctl { + struct in_addr mfcc_origin; /* subnet origin of mcasts */ + struct in_addr mfcc_mcastgrp; /* multicast group associated*/ + struct in_addr mfcc_originmask; /* subnet mask for origin */ + vifi_t mfcc_parent; /* incoming vif */ + u_char mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ }; /* - * The kernel's multicast route structure. + * Argument structure for DVMRP_DEL_MFC */ -struct mrt { - struct in_addr mrt_origin; /* subnet origin of multicasts */ - struct in_addr mrt_originmask; /* subnet mask for origin */ - vifi_t mrt_parent; /* incoming vif */ - vifbitmap_t mrt_children; /* outgoing children vifs */ - vifbitmap_t mrt_leaves; /* subset of outgoing children vifs */ - struct mrt *mrt_next; /* forward link */ +struct delmfcctl { + struct in_addr mfcc_origin; /* subnet origin of multicasts */ + struct in_addr mfcc_mcastgrp; /* multicast group assoc. w/ origin */ }; +/* + * Argument structure used by RSVP daemon to get vif information + */ +struct vif_req { + u_char v_flags; /* VIFF_ flags defined above */ + u_char v_threshold; /* min ttl required to forward on vif */ + struct in_addr v_lcl_addr; /* local interface address */ + struct in_addr v_rmt_addr; + char v_if_name[IFNAMSIZ]; /* if name */ +}; -#define MRTHASHSIZ 64 -#if (MRTHASHSIZ & (MRTHASHSIZ - 1)) == 0 /* from sys:route.h */ -#define MRTHASHMOD(h) ((h) & (MRTHASHSIZ - 1)) -#else -#define MRTHASHMOD(h) ((h) % MRTHASHSIZ) -#endif +struct vif_conf { + u_int vifc_len; + u_int vifc_num; + struct vif_req *vifc_req; +}; /* * The kernel's multicast routing statistics. */ struct mrtstat { - u_long mrts_mrt_lookups; /* # multicast route lookups */ - u_long mrts_mrt_misses; /* # multicast route cache misses */ - u_long mrts_grp_lookups; /* # group address lookups */ - u_long mrts_grp_misses; /* # group address cache misses */ - u_long mrts_no_route; /* no route for packet's origin */ - u_long mrts_bad_tunnel; /* malformed tunnel options */ - u_long mrts_cant_tunnel; /* no room for tunnel options */ + u_long mrts_mfc_lookups; /* # forw. cache hash table hits */ + u_long mrts_mfc_misses; /* # forw. cache hash table misses */ + u_long mrts_upcalls; /* # calls to mrouted */ + u_long mrts_no_route; /* no route for packet's origin */ + u_long mrts_bad_tunnel; /* malformed tunnel options */ + u_long mrts_cant_tunnel; /* no room for tunnel options */ + u_long mrts_wrong_if; /* arrived on wrong interface */ + u_long mrts_upq_ovflw; /* upcall Q overflow */ + u_long mrts_cache_cleanups; /* # entries with no upcalls */ + u_long mrts_drop_sel; /* pkts dropped selectively */ + u_long mrts_q_overflow; /* pkts dropped - Q overflow */ + u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */ }; +/* + * Argument structure used by mrouted to get src-grp pkt counts + */ +struct sioc_sg_req { + struct in_addr src; + struct in_addr grp; + u_long count; +}; + +/* + * Argument structure used by mrouted to get vif pkt counts + */ +struct sioc_vif_req { + vifi_t vifi; + u_long icount; + u_long ocount; +}; + + +#ifdef KERNEL + +struct vif { + u_char v_flags; /* VIFF_ flags defined above */ + u_char v_threshold; /* min ttl required to forward on vif*/ + u_int v_rate_limit; /* max rate */ + struct tbf *v_tbf; /* token bucket structure at intf. */ + struct in_addr v_lcl_addr; /* local interface address */ + struct in_addr v_rmt_addr; /* remote address (tunnels only) */ + struct ifnet *v_ifp; /* pointer to interface */ + u_long v_pkt_in; /* # pkts in on interface */ + u_long v_pkt_out; /* # pkts out on interface */ +}; + +/* + * The kernel's multicast forwarding cache entry structure + * (A field for the type of service (mfc_tos) is to be added + * at a future point) + */ +struct mfc { + struct in_addr mfc_origin; /* subnet origin of mcasts */ + struct in_addr mfc_mcastgrp; /* multicast group associated*/ + struct in_addr mfc_originmask; /* subnet mask for origin */ + vifi_t mfc_parent; /* incoming vif */ + u_char mfc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ + u_long mfc_pkt_cnt; /* pkt count for src-grp */ +}; + +/* + * Argument structure used for pkt info. while upcall is made + */ +struct rtdetq { + struct mbuf *m; + struct ifnet *ifp; + u_long tunnel_src; + struct ip_moptions *imo; +}; + +#define MFCTBLSIZ 256 +#if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0 /* from sys:route.h */ +#define MFCHASHMOD(h) ((h) & (MFCTBLSIZ - 1)) +#else +#define MFCHASHMOD(h) ((h) % MFCTBLSIZ) +#endif + +#define MAX_UPQ 4 /* max. no of pkts in upcall Q */ + +/* + * Token Bucket filter code + */ +#define MAX_BKT_SIZE 10000 /* 10K bytes size */ +#define MAXQSIZE 10 /* max # of pkts in queue */ + +/* + * queue structure at each vif + */ +struct pkt_queue +{ + u_long pkt_len; /* length of packet in queue */ + struct mbuf *pkt_m; /* pointer to packet mbuf */ + struct ip *pkt_ip; /* pointer to ip header */ + struct ip_moptions *pkt_imo; /* IP multicast options assoc. with pkt */ +}; + +/* + * the token bucket filter at each vif + */ +struct tbf +{ + u_long last_pkt_t; /* arr. time of last pkt */ + u_long n_tok; /* no of tokens in bucket */ + u_long q_len; /* length of queue at this vif */ +}; int ip_mrouter_cmd __P((int, struct socket *, struct mbuf *)); int ip_mrouter_done __P((void)); #endif /* KERNEL */ -#endif +#endif /* _NETINET_IP_MROUTE_H_ */ diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 994cc980f5cc..57ec677656d8 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 - * $Id: ip_output.c,v 1.4 1994/08/02 07:48:45 davidg Exp $ + * $Id: ip_output.c,v 1.5 1994/08/18 22:35:31 wollman Exp $ */ #include @@ -225,9 +225,16 @@ ip_output(m0, opt, ro, flags, imo) * above, will be forwarded by the ip_input() routine, * if necessary. */ - extern struct socket *ip_mrouter; if (ip_mrouter && (flags & IP_FORWARDING) == 0) { - if (ip_mforward(m, ifp) != 0) { + /* + * Check if rsvp daemon is running. If not, don't + * set ip_moptions. This ensures that the packet + * is multicast and not just sent down one link + * as prescribed by rsvpd. + */ + if (ip_rsvpd == NULL) + imo = NULL; + if (ip_mforward(ip, ifp, m, imo) != 0) { m_freem(m); goto done; } @@ -557,6 +564,7 @@ ip_ctloutput(op, so, level, optname, mp) #undef OPTSET case IP_MULTICAST_IF: + case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: @@ -620,6 +628,7 @@ ip_ctloutput(op, so, level, optname, mp) break; case IP_MULTICAST_IF: + case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: @@ -774,6 +783,7 @@ ip_setmoptions(optname, imop, m) return (ENOBUFS); *imop = imo; imo->imo_multicast_ifp = NULL; + imo->imo_multicast_vif = 0; imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; imo->imo_num_memberships = 0; @@ -781,6 +791,20 @@ ip_setmoptions(optname, imop, m) switch (optname) { + /* store an index number for the vif you wanna use in the send */ + case IP_MULTICAST_VIF: + if (m == NULL || m->m_len != sizeof(int)) { + error = EINVAL; + break; + } + i = *(mtod(m, int *)); + if (!legal_vif_num(i)) { + error = EINVAL; + break; + } + imo->imo_multicast_vif = i; + break; + case IP_MULTICAST_IF: /* * Select the interface for outgoing multicast packets. @@ -972,6 +996,7 @@ ip_setmoptions(optname, imop, m) * If all options have default values, no need to keep the mbuf. */ if (imo->imo_multicast_ifp == NULL && + imo->imo_multicast_vif == 0 && imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && imo->imo_num_memberships == 0) { @@ -1000,6 +1025,14 @@ ip_getmoptions(optname, imo, mp) switch (optname) { + case IP_MULTICAST_VIF: + if (imo != NULL) + *(mtod(*mp, int *)) = imo->imo_multicast_vif; + else + *(mtod(*mp, int *)) = 7890; + (*mp)->m_len = sizeof(int); + return(0); + case IP_MULTICAST_IF: addr = mtod(*mp, struct in_addr *); (*mp)->m_len = sizeof(struct in_addr); diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 6c83c4ff417d..3e8db9640335 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ip_var.h 8.1 (Berkeley) 6/10/93 - * $Id: ip_var.h,v 1.3 1994/08/18 22:35:31 wollman Exp $ + * $Id: ip_var.h,v 1.4 1994/08/21 05:27:33 paul Exp $ */ #ifndef _NETINET_IP_VAR_H_ @@ -111,6 +111,7 @@ struct ipoption { */ struct ip_moptions { struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */ + u_long imo_multicast_vif; /* vif num outgoing multicasts */ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */ u_char imo_multicast_loop; /* 1 => hear sends if a member */ u_short imo_num_memberships; /* no. memberships this socket */ @@ -155,6 +156,8 @@ extern struct ipstat ipstat; extern struct ipq ipq; /* ip reass. queue */ extern u_short ip_id; /* ip packet ctr, for ids */ extern int ip_defttl; /* default IP ttl */ +extern struct socket *ip_rsvpd; /* reservation protocol daemon */ +extern struct socket *ip_mrouter; /* multicast routing daemon */ int ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); void ip_deq __P((struct ipasfrag *)); @@ -166,7 +169,8 @@ void ip_freef __P((struct ipq *)); void ip_freemoptions __P((struct ip_moptions *)); int ip_getmoptions __P((int, struct ip_moptions *, struct mbuf **)); void ip_init __P((void)); -int ip_mforward __P((struct mbuf *, struct ifnet *)); +int ip_mforward __P((struct ip *, struct ifnet *, struct mbuf *, + struct ip_moptions *)); int ip_optcopy __P((struct ip *, struct ip *)); int ip_output __P((struct mbuf *, struct mbuf *, struct route *, int, struct ip_moptions *)); diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 86c2b5a618e7..e0cddca4fe42 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 - * $Id$ + * $Id: raw_ip.c,v 1.2 1994/08/02 07:48:49 davidg Exp $ */ #include @@ -203,14 +203,20 @@ rip_ctloutput(op, so, level, optname, m) } break; + case IP_RSVP_ON: + error = ip_rsvp_init(so); + break; + + case IP_RSVP_OFF: + error = ip_rsvp_done(); + break; + case DVMRP_INIT: case DVMRP_DONE: case DVMRP_ADD_VIF: case DVMRP_DEL_VIF: - case DVMRP_ADD_LGRP: - case DVMRP_DEL_LGRP: - case DVMRP_ADD_MRT: - case DVMRP_DEL_MRT: + case DVMRP_ADD_MFC: + case DVMRP_DEL_MFC: #ifdef MROUTING if (op == PRCO_SETOPT) { error = ip_mrouter_cmd(optname, so, *m); @@ -240,9 +246,6 @@ rip_usrreq(so, req, m, nam, control) { register int error = 0; register struct inpcb *inp = sotoinpcb(so); -#ifdef MROUTING - extern struct socket *ip_mrouter; -#endif switch (req) { case PRU_ATTACH: diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index b253044fc0d1..d6569de704fb 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)sockio.h 8.1 (Berkeley) 3/28/94 - * $Id: sockio.h,v 1.2 1994/08/02 07:53:37 davidg Exp $ + * $Id: sockio.h,v 1.3 1994/08/08 10:49:26 davidg Exp $ */ #ifndef _SYS_SOCKIO_H_ @@ -50,6 +50,11 @@ #define SIOCADDRT _IOW('r', 10, struct ortentry) /* add route */ #define SIOCDELRT _IOW('r', 11, struct ortentry) /* delete route */ +#define SIOCSETRTINFO _IOWR('r', 12, struct fullrtentry) /* change aux info */ +#define SIOCGETRTINFO _IOWR('r', 13, struct fullrtentry) /* read aux info */ +#define SIOCGETVIFINF _IOWR('r', 14, struct vif_conf) /* read m/c vifs */ +#define SIOCGETVIFCNT _IOWR('r', 15, struct sioc_vif_req)/* get vif pkt cnt */ +#define SIOCGETSGCNT _IOWR('r', 16, struct sioc_sg_req) /* get s,g pkt cnt */ #define SIOCSIFADDR _IOW('i', 12, struct ifreq) /* set ifnet address */ #define OSIOCGIFADDR _IOWR('i', 13, struct ifreq) /* get ifnet address */