diff --git a/sys/net/if_llatbl.c b/sys/net/if_llatbl.c index a0fb071d6f16..3444c97b7c39 100644 --- a/sys/net/if_llatbl.c +++ b/sys/net/if_llatbl.c @@ -100,18 +100,34 @@ done: * This function is called by the timer functions * such as arptimer() and nd6_llinfo_timer(), and * the caller does the locking. + * + * Returns the number of held packets, if any, that were dropped. */ -void +size_t llentry_free(struct llentry *lle) { - + size_t pkts_dropped; + struct mbuf *next; + + pkts_dropped = 0; LLE_WLOCK_ASSERT(lle); LIST_REMOVE(lle, lle_next); - if (lle->la_hold != NULL) + while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) { + next = lle->la_hold->m_nextpkt; m_freem(lle->la_hold); + lle->la_hold = next; + lle->la_numheld--; + pkts_dropped++; + } + + KASSERT(lle->la_numheld == 0, + ("%s: la_numheld %d > 0, pkts_droped %ld", __func__, + lle->la_numheld, pkts_dropped)); LLE_FREE_LOCKED(lle); + + return (pkts_dropped); } /* @@ -412,6 +428,7 @@ llatbl_lle_show(struct llentry_sa *la) db_printf(" lle_tbl=%p\n", lle->lle_tbl); db_printf(" lle_head=%p\n", lle->lle_head); db_printf(" la_hold=%p\n", lle->la_hold); + db_printf(" la_numheld=%d\n", lle->la_numheld); db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire); db_printf(" la_flags=0x%04x\n", lle->la_flags); db_printf(" la_asked=%u\n", lle->la_asked); diff --git a/sys/net/if_llatbl.h b/sys/net/if_llatbl.h index a4d02ab07906..6c5ad188b2c8 100644 --- a/sys/net/if_llatbl.h +++ b/sys/net/if_llatbl.h @@ -58,6 +58,7 @@ struct llentry { struct lltable *lle_tbl; struct llentries *lle_head; struct mbuf *la_hold; + int la_numheld; /* # of packets currently held */ time_t la_expire; uint16_t la_flags; uint16_t la_asked; @@ -191,7 +192,7 @@ void lltable_drain(int); #endif int lltable_sysctl_dumparp(int, struct sysctl_req *); -void llentry_free(struct llentry *); +size_t llentry_free(struct llentry *); int llentry_update(struct llentry **, struct lltable *, struct sockaddr_storage *, struct ifnet *); diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 6d383886f054..ef59cdb30ae5 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -89,13 +89,16 @@ VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for static VNET_DEFINE(int, arp_proxyall) = 0; static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for * 20 seconds */ -static VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ +VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ + +static VNET_DEFINE(int, arp_maxhold) = 1; #define V_arpt_keep VNET(arpt_keep) #define V_arpt_down VNET(arpt_down) #define V_arp_maxtries VNET(arp_maxtries) #define V_arp_proxyall VNET(arp_proxyall) #define V_arpstat VNET(arpstat) +#define V_arp_maxhold VNET(arp_maxhold) SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, @@ -109,9 +112,15 @@ SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, &VNET_NAME(arp_proxyall), 0, "Enable proxy ARP for all suitable requests"); +SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW, + &VNET_NAME(arpt_down), 0, + "Incomplete ARP entry lifetime in seconds"); SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW, &VNET_NAME(arpstat), arpstat, "ARP statistics (struct arpstat, net/if_arp.h)"); +SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW, + &VNET_NAME(arp_maxhold), 0, + "Number of packets to hold per ARP entry"); static void arp_init(void); void arprequest(struct ifnet *, @@ -162,6 +171,7 @@ arptimer(void *arg) { struct ifnet *ifp; struct llentry *lle; + int pkts_dropped; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); lle = (struct llentry *)arg; @@ -176,7 +186,8 @@ arptimer(void *arg) callout_active(&lle->la_timer)) { callout_stop(&lle->la_timer); LLE_REMREF(lle); - (void) llentry_free(lle); + pkts_dropped = llentry_free(lle); + ARPSTAT_ADD(dropped, pkts_dropped); ARPSTAT_INC(timeouts); } else { #ifdef DIAGNOSTIC @@ -275,6 +286,8 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, { struct llentry *la = 0; u_int flags = 0; + struct mbuf *curr = NULL; + struct mbuf *next = NULL; int error, renew; *lle = NULL; @@ -348,15 +361,28 @@ retry: } /* * There is an arptab entry, but no ethernet address - * response yet. Replace the held mbuf with this - * latest one. + * response yet. Add the mbuf to the list, dropping + * the oldest packet if we have exceeded the system + * setting. */ if (m != NULL) { + if (la->la_numheld >= V_arp_maxhold) { + if (la->la_hold != NULL) { + next = la->la_hold->m_nextpkt; + m_freem(la->la_hold); + la->la_hold = next; + la->la_numheld--; + ARPSTAT_INC(dropped); + } + } if (la->la_hold != NULL) { - m_freem(la->la_hold); - ARPSTAT_INC(dropped); - } - la->la_hold = m; + curr = la->la_hold; + while (curr->m_nextpkt != NULL) + curr = curr->m_nextpkt; + curr->m_nextpkt = m; + } else + la->la_hold = m; + la->la_numheld++; if (renew == 0 && (flags & LLE_EXCLUSIVE)) { flags &= ~LLE_EXCLUSIVE; LLE_DOWNGRADE(la); @@ -483,7 +509,6 @@ in_arpinput(struct mbuf *m) struct rtentry *rt; struct ifaddr *ifa; struct in_ifaddr *ia; - struct mbuf *hold; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; u_int8_t *enaddr = NULL; @@ -698,15 +723,29 @@ match: } la->la_asked = 0; la->la_preempt = V_arp_maxtries; - hold = la->la_hold; - if (hold != NULL) { - la->la_hold = NULL; + /* + * The packets are all freed within the call to the output + * routine. + * + * NB: The lock MUST be released before the call to the + * output routine. + */ + if (la->la_hold != NULL) { + struct mbuf *m_hold, *m_hold_next; + memcpy(&sa, L3_ADDR(la), sizeof(sa)); - } - LLE_WUNLOCK(la); - if (hold != NULL) - (*ifp->if_output)(ifp, hold, &sa, NULL); - } + LLE_WUNLOCK(la); + for (m_hold = la->la_hold, la->la_hold = NULL; + m_hold != NULL; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_hold->m_nextpkt = NULL; + (*ifp->if_output)(ifp, m_hold, &sa, NULL); + } + } else + LLE_WUNLOCK(la); + la->la_hold = NULL; + la->la_numheld = 0; + } /* end of FIB loop */ reply: if (op != ARPOP_REQUEST) goto drop; diff --git a/sys/netinet/in.c b/sys/netinet/in.c index b2ae8d94f54b..2ec54e29ede2 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -89,6 +90,9 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW, VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) +VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ +#define V_arpstat VNET(arpstat) + /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). If subnetsarelocal @@ -1363,6 +1367,7 @@ in_lltable_prefix_free(struct lltable *llt, const struct sockaddr_in *msk = (const struct sockaddr_in *)mask; struct llentry *lle, *next; register int i; + size_t pkts_dropped; for (i=0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { @@ -1375,7 +1380,8 @@ in_lltable_prefix_free(struct lltable *llt, LLE_WLOCK(lle); if (canceled) LLE_REMREF(lle); - llentry_free(lle); + pkts_dropped = llentry_free(lle); + ARPSTAT_ADD(dropped, pkts_dropped); } } } diff --git a/tools/regression/netinet/arphold/Makefile b/tools/regression/netinet/arphold/Makefile new file mode 100644 index 000000000000..37abefb57ddc --- /dev/null +++ b/tools/regression/netinet/arphold/Makefile @@ -0,0 +1,7 @@ +# $FreeBSD$ + +PROG= arphold +NO_MAN= +CFLAGS+= -Wall + +.include diff --git a/tools/regression/netinet/arphold/arphold.c b/tools/regression/netinet/arphold/arphold.c new file mode 100644 index 000000000000..092461ddb47c --- /dev/null +++ b/tools/regression/netinet/arphold/arphold.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2010 Advanced Computing Technologies LLC + * Written by George Neville-Neil gnn@freebsd.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Description: The following is a test of the arp entry packet queues + * which replaced the single packet hold entry that existed in the BSDs + * since time immemorial. The test process is: + * + * 1) Find out the current system limit (maxhold) + * 2) Using an IP address for which we do not yet have an entry + * load up an ARP entry packet queue with exactly that many packets. + * 3) Check the arp dropped stat to make sure that we have not dropped + * any packets as yet. + * 4) Add one more packet to the queue. + * 5) Make sure that only one packet was dropped. + * + * CAVEAT: The ARP timer will flush the queue after 1 second so it is + * important not to run this code in a fast loop or the test will + * fail. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MSG_SIZE 1024 +#define PORT 6969 + +int +main(int argc, char **argv) +{ + + int sock; + int maxhold; + int wait; + size_t size = sizeof(maxhold); + struct sockaddr_in dest; + char message[MSG_SIZE]; + struct arpstat arpstat; + size_t len = sizeof(arpstat); + unsigned long dropped = 0; + + memset(&message, 1, sizeof(message)); + + if (sysctlbyname("net.link.ether.inet.maxhold", &maxhold, &size, + NULL, 0) < 0) { + perror("not ok 1 - sysctlbyname failed"); + exit(1); + } + +#ifdef DEBUG + printf("maxhold is %d\n", maxhold); +#endif /* DEBUG */ + + if ((sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) { + perror("not ok 1 - could not open socket"); + exit(1); + } + + bzero(&dest, sizeof(dest)); + if (inet_pton(AF_INET, argv[1], &dest.sin_addr.s_addr) != 1) { + perror("not ok 1 - could not parse address"); + exit(1); + } + dest.sin_len = sizeof(dest); + dest.sin_family = AF_INET; + dest.sin_port = htons(PORT); + + if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len, + NULL, 0) < 0) { + perror("not ok 1 - could not get initial arp stats"); + exit(1); + } + + dropped = arpstat.dropped; +#ifdef DEBUG + printf("dropped before %ld\n", dropped); +#endif /* DEBUG */ + + /* + * Load up the queue in the ARP entry to the maximum. + * We should not drop any packets at this point. + */ + + while (maxhold > 0) { + if (sendto(sock, message, sizeof(message), 0, + (struct sockaddr *)&dest, sizeof(dest)) < 0) { + perror("not ok 1 - could not send packet"); + exit(1); + } + maxhold--; + } + + if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len, + NULL, 0) < 0) { + perror("not ok 1 - could not get new arp stats"); + exit(1); + } + +#ifdef DEBUG + printf("dropped after %ld\n", arpstat.dropped); +#endif /* DEBUG */ + + if (arpstat.dropped != dropped) { + printf("not ok 1 - Failed, drops changed:" + "before %ld after %ld\n", dropped, arpstat.dropped); + exit(1); + } + + dropped = arpstat.dropped; + + /* Now add one extra and make sure it is dropped. */ + if (sendto(sock, message, sizeof(message), 0, + (struct sockaddr *)&dest, sizeof(dest)) < 0) { + perror("not ok 1 - could not send packet"); + exit(1); + } + + if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len, + NULL, 0) < 0) { + perror("not ok 1 - could not get new arp stats"); + exit(1); + } + + if (arpstat.dropped != (dropped + 1)) { + printf("not ok 1 - Failed to drop one packet: before" + " %ld after %ld\n", dropped, arpstat.dropped); + exit(1); + } + + printf("ok\n"); + return (0); +} diff --git a/tools/regression/netinet/arphold/arphold.t b/tools/regression/netinet/arphold/arphold.t new file mode 100644 index 000000000000..eb1b2a8c5ad3 --- /dev/null +++ b/tools/regression/netinet/arphold/arphold.t @@ -0,0 +1,7 @@ +#!/bin/sh +# +# $FreeBSD$ + +make arphold 2>&1 > /dev/null + +./arphold 192.168.1.222