src/sys/net/if_pfsync.c

3364 lines
75 KiB
C

/* $OpenBSD: if_pfsync.c,v 1.325 2024/02/13 12:22:09 bluhm Exp $ */
/*
* Copyright (c) 2002 Michael Shalayeff
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 2009, 2022, 2023 David Gwynne <dlg@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "bpfilter.h"
#include "pfsync.h"
#include "kstat.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/timeout.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/pool.h>
#include <sys/syslog.h>
#include <sys/tree.h>
#include <sys/smr.h>
#include <sys/percpu.h>
#include <sys/refcnt.h>
#include <sys/kstat.h>
#include <sys/stdarg.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/bpf.h>
#include <net/netisr.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/ip_ipsp.h>
#include <netinet/ip_icmp.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_fsm.h>
#include <netinet/udp.h>
#ifdef INET6
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
#endif /* INET6 */
#include "carp.h"
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif
#include <net/pfvar.h>
#include <net/pfvar_priv.h>
#include <net/if_pfsync.h>
#define PFSYNC_MINPKT ( \
sizeof(struct ip) + \
sizeof(struct pfsync_header))
struct pfsync_softc;
struct pfsync_deferral {
TAILQ_ENTRY(pfsync_deferral) pd_entry;
struct pf_state *pd_st;
struct mbuf *pd_m;
uint64_t pd_deadline;
};
TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
#define PFSYNC_DEFER_NSEC 20000000ULL
#define PFSYNC_DEFER_LIMIT 128
#define PFSYNC_BULK_SND_IVAL_MS 20
static struct pool pfsync_deferrals_pool;
enum pfsync_bulk_req_state {
PFSYNC_BREQ_S_NONE,
PFSYNC_BREQ_S_START,
PFSYNC_BREQ_S_SENT,
PFSYNC_BREQ_S_BULK,
PFSYNC_BREQ_S_DONE,
};
static const char *pfsync_bulk_req_state_names[] = {
[PFSYNC_BREQ_S_NONE] = "none",
[PFSYNC_BREQ_S_START] = "start",
[PFSYNC_BREQ_S_SENT] = "sent",
[PFSYNC_BREQ_S_BULK] = "bulk",
[PFSYNC_BREQ_S_DONE] = "done",
};
enum pfsync_bulk_req_event {
PFSYNC_BREQ_EVT_UP,
PFSYNC_BREQ_EVT_DOWN,
PFSYNC_BREQ_EVT_TMO,
PFSYNC_BREQ_EVT_LINK,
PFSYNC_BREQ_EVT_BUS_START,
PFSYNC_BREQ_EVT_BUS_END,
};
static const char *pfsync_bulk_req_event_names[] = {
[PFSYNC_BREQ_EVT_UP] = "up",
[PFSYNC_BREQ_EVT_DOWN] = "down",
[PFSYNC_BREQ_EVT_TMO] = "timeout",
[PFSYNC_BREQ_EVT_LINK] = "link",
[PFSYNC_BREQ_EVT_BUS_START] = "bus-start",
[PFSYNC_BREQ_EVT_BUS_END] = "bus-end",
};
struct pfsync_slice {
struct pfsync_softc *s_pfsync;
struct mutex s_mtx;
struct pf_state_queue s_qs[PFSYNC_S_COUNT];
TAILQ_HEAD(, tdb) s_tdb_q;
size_t s_len;
struct mbuf_list s_ml;
struct taskq *s_softnet;
struct task s_task;
struct timeout s_tmo;
struct mbuf_queue s_sendq;
struct task s_send;
struct pfsync_deferrals s_deferrals;
unsigned int s_deferred;
struct task s_deferrals_task;
struct timeout s_deferrals_tmo;
uint64_t s_stat_locks;
uint64_t s_stat_contended;
uint64_t s_stat_write_nop;
uint64_t s_stat_task_add;
uint64_t s_stat_task_run;
uint64_t s_stat_enqueue;
uint64_t s_stat_dequeue;
uint64_t s_stat_defer_add;
uint64_t s_stat_defer_ack;
uint64_t s_stat_defer_run;
uint64_t s_stat_defer_overlimit;
struct kstat *s_kstat;
} __aligned(CACHELINESIZE);
#define PFSYNC_SLICE_BITS 1
#define PFSYNC_NSLICES (1 << PFSYNC_SLICE_BITS)
struct pfsync_softc {
struct ifnet sc_if;
unsigned int sc_dead;
unsigned int sc_up;
struct refcnt sc_refs;
/* config */
struct in_addr sc_syncpeer;
unsigned int sc_maxupdates;
unsigned int sc_defer;
/* operation */
unsigned int sc_sync_ifidx;
unsigned int sc_sync_if_down;
void *sc_inm;
struct task sc_ltask;
struct task sc_dtask;
struct ip sc_template;
struct pfsync_slice sc_slices[PFSYNC_NSLICES];
struct {
struct rwlock req_lock;
struct timeout req_tmo;
enum pfsync_bulk_req_state req_state;
unsigned int req_tries;
unsigned int req_demoted;
} sc_bulk_req;
struct {
struct rwlock snd_lock;
struct timeout snd_tmo;
time_t snd_requested;
struct pf_state *snd_next;
struct pf_state *snd_tail;
unsigned int snd_again;
} sc_bulk_snd;
};
static struct pfsync_softc *pfsyncif = NULL;
static struct cpumem *pfsynccounters;
static inline void
pfsyncstat_inc(enum pfsync_counters c)
{
counters_inc(pfsynccounters, c);
}
static int pfsync_clone_create(struct if_clone *, int);
static int pfsync_clone_destroy(struct ifnet *);
static int pfsync_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
static void pfsync_start(struct ifqueue *);
static int pfsync_ioctl(struct ifnet *, u_long, caddr_t);
static int pfsync_up(struct pfsync_softc *);
static int pfsync_down(struct pfsync_softc *);
static int pfsync_set_mtu(struct pfsync_softc *, unsigned int);
static int pfsync_set_parent(struct pfsync_softc *,
const struct if_parent *);
static int pfsync_get_parent(struct pfsync_softc *, struct if_parent *);
static int pfsync_del_parent(struct pfsync_softc *);
static int pfsync_get_ioc(struct pfsync_softc *, struct ifreq *);
static int pfsync_set_ioc(struct pfsync_softc *, struct ifreq *);
static void pfsync_syncif_link(void *);
static void pfsync_syncif_detach(void *);
static void pfsync_sendout(struct pfsync_softc *, struct mbuf *);
static void pfsync_slice_drop(struct pfsync_softc *, struct pfsync_slice *);
static void pfsync_slice_tmo(void *);
static void pfsync_slice_task(void *);
static void pfsync_slice_sendq(void *);
static void pfsync_deferrals_tmo(void *);
static void pfsync_deferrals_task(void *);
static void pfsync_defer_output(struct pfsync_deferral *);
static void pfsync_bulk_req_evt(struct pfsync_softc *,
enum pfsync_bulk_req_event);
static void pfsync_bulk_req_tmo(void *);
static void pfsync_bulk_snd_tmo(void *);
#if NKSTAT > 0
struct pfsync_kstat_data {
struct kstat_kv pd_locks;
struct kstat_kv pd_contended;
struct kstat_kv pd_write_nop;
struct kstat_kv pd_task_add;
struct kstat_kv pd_task_run;
struct kstat_kv pd_enqueue;
struct kstat_kv pd_dequeue;
struct kstat_kv pd_qdrop;
struct kstat_kv pd_defer_len;
struct kstat_kv pd_defer_add;
struct kstat_kv pd_defer_ack;
struct kstat_kv pd_defer_run;
struct kstat_kv pd_defer_overlimit;
};
static const struct pfsync_kstat_data pfsync_kstat_tpl = {
KSTAT_KV_INITIALIZER("locks", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("contended", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("write-nops", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("send-sched", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("send-run", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("enqueues", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("dequeues", KSTAT_KV_T_COUNTER64),
KSTAT_KV_UNIT_INITIALIZER("qdrops",
KSTAT_KV_T_COUNTER32, KSTAT_KV_U_PACKETS),
KSTAT_KV_UNIT_INITIALIZER("defer-len",
KSTAT_KV_T_COUNTER32, KSTAT_KV_U_PACKETS),
KSTAT_KV_INITIALIZER("defer-add", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("defer-ack", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("defer-run", KSTAT_KV_T_COUNTER64),
KSTAT_KV_INITIALIZER("defer-over", KSTAT_KV_T_COUNTER64),
};
static int
pfsync_kstat_copy(struct kstat *ks, void *dst)
{
struct pfsync_slice *s = ks->ks_softc;
struct pfsync_kstat_data *pd = dst;
*pd = pfsync_kstat_tpl;
kstat_kv_u64(&pd->pd_locks) = s->s_stat_locks;
kstat_kv_u64(&pd->pd_contended) = s->s_stat_contended;
kstat_kv_u64(&pd->pd_write_nop) = s->s_stat_write_nop;
kstat_kv_u64(&pd->pd_task_add) = s->s_stat_task_add;
kstat_kv_u64(&pd->pd_task_run) = s->s_stat_task_run;
kstat_kv_u64(&pd->pd_enqueue) = s->s_stat_enqueue;
kstat_kv_u64(&pd->pd_dequeue) = s->s_stat_dequeue;
kstat_kv_u32(&pd->pd_qdrop) = mq_drops(&s->s_sendq);
kstat_kv_u32(&pd->pd_defer_len) = s->s_deferred;
kstat_kv_u64(&pd->pd_defer_add) = s->s_stat_defer_add;
kstat_kv_u64(&pd->pd_defer_ack) = s->s_stat_defer_ack;
kstat_kv_u64(&pd->pd_defer_run) = s->s_stat_defer_run;
kstat_kv_u64(&pd->pd_defer_overlimit) = s->s_stat_defer_overlimit;
return (0);
}
#endif /* NKSTAT > 0 */
#define PFSYNC_MAX_BULKTRIES 12
struct if_clone pfsync_cloner =
IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
void
pfsyncattach(int npfsync)
{
pfsynccounters = counters_alloc(pfsyncs_ncounters);
if_clone_attach(&pfsync_cloner);
}
static int
pfsync_clone_create(struct if_clone *ifc, int unit)
{
struct pfsync_softc *sc;
struct ifnet *ifp;
size_t i, q;
if (unit != 0)
return (ENXIO);
if (pfsync_deferrals_pool.pr_size == 0) {
pool_init(&pfsync_deferrals_pool,
sizeof(struct pfsync_deferral), 0,
IPL_MPFLOOR, 0, "pfdefer", NULL);
/* pool_cache_init(&pfsync_deferrals_pool); */
}
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
if (sc == NULL)
return (ENOMEM);
/* sc_refs is "owned" by IFF_RUNNING */
sc->sc_syncpeer.s_addr = INADDR_PFSYNC_GROUP;
sc->sc_maxupdates = 128;
sc->sc_defer = 0;
task_set(&sc->sc_ltask, pfsync_syncif_link, sc);
task_set(&sc->sc_dtask, pfsync_syncif_detach, sc);
rw_init(&sc->sc_bulk_req.req_lock, "pfsyncbreq");
/* need process context to take net lock to call ip_output */
timeout_set_proc(&sc->sc_bulk_req.req_tmo, pfsync_bulk_req_tmo, sc);
rw_init(&sc->sc_bulk_snd.snd_lock, "pfsyncbsnd");
/* need process context to take net lock to call ip_output */
timeout_set_proc(&sc->sc_bulk_snd.snd_tmo, pfsync_bulk_snd_tmo, sc);
ifp = &sc->sc_if;
snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d",
ifc->ifc_name, unit);
ifp->if_softc = sc;
ifp->if_ioctl = pfsync_ioctl;
ifp->if_output = pfsync_output;
ifp->if_qstart = pfsync_start;
ifp->if_type = IFT_PFSYNC;
ifp->if_hdrlen = sizeof(struct pfsync_header);
ifp->if_mtu = ETHERMTU;
ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
for (i = 0; i < nitems(sc->sc_slices); i++) {
struct pfsync_slice *s = &sc->sc_slices[i];
s->s_pfsync = sc;
mtx_init_flags(&s->s_mtx, IPL_SOFTNET, "pfslice", 0);
s->s_softnet = net_tq(i);
timeout_set(&s->s_tmo, pfsync_slice_tmo, s);
task_set(&s->s_task, pfsync_slice_task, s);
mq_init(&s->s_sendq, 16, IPL_SOFTNET);
task_set(&s->s_send, pfsync_slice_sendq, s);
s->s_len = PFSYNC_MINPKT;
ml_init(&s->s_ml);
for (q = 0; q < nitems(s->s_qs); q++)
TAILQ_INIT(&s->s_qs[q]);
TAILQ_INIT(&s->s_tdb_q);
/* stupid NET_LOCK */
timeout_set(&s->s_deferrals_tmo, pfsync_deferrals_tmo, s);
task_set(&s->s_deferrals_task, pfsync_deferrals_task, s);
TAILQ_INIT(&s->s_deferrals);
#if NKSTAT > 0
s->s_kstat = kstat_create(ifp->if_xname, 0, "pfsync-slice", i,
KSTAT_T_KV, 0);
kstat_set_mutex(s->s_kstat, &s->s_mtx);
s->s_kstat->ks_softc = s;
s->s_kstat->ks_datalen = sizeof(pfsync_kstat_tpl);
s->s_kstat->ks_copy = pfsync_kstat_copy;
kstat_install(s->s_kstat);
#endif
}
if_counters_alloc(ifp);
if_attach(ifp);
if_alloc_sadl(ifp);
#if NCARP > 0
if_addgroup(ifp, "carp");
#endif
#if NBPFILTER > 0
bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
#endif
return (0);
}
static int
pfsync_clone_destroy(struct ifnet *ifp)
{
struct pfsync_softc *sc = ifp->if_softc;
#if NKSTAT > 0
size_t i;
#endif
NET_LOCK();
sc->sc_dead = 1;
if (ISSET(ifp->if_flags, IFF_RUNNING))
pfsync_down(sc);
NET_UNLOCK();
if_detach(ifp);
#if NKSTAT > 0
for (i = 0; i < nitems(sc->sc_slices); i++) {
struct pfsync_slice *s = &sc->sc_slices[i];
kstat_destroy(s->s_kstat);
}
#endif
free(sc, M_DEVBUF, sizeof(*sc));
return (0);
}
static void
pfsync_dprintf(struct pfsync_softc *sc, const char *fmt, ...)
{
struct ifnet *ifp = &sc->sc_if;
va_list ap;
if (!ISSET(ifp->if_flags, IFF_DEBUG))
return;
printf("%s: ", ifp->if_xname);
va_start(ap, fmt);
vprintf(fmt, ap);
va_end(ap);
printf("\n");
}
static void
pfsync_syncif_link(void *arg)
{
struct pfsync_softc *sc = arg;
struct ifnet *ifp0;
unsigned int sync_if_down = 1;
ifp0 = if_get(sc->sc_sync_ifidx);
if (ifp0 != NULL && LINK_STATE_IS_UP(ifp0->if_link_state)) {
pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_LINK);
sync_if_down = 0;
}
if_put(ifp0);
#if NCARP > 0
if (sc->sc_sync_if_down != sync_if_down) {
carp_group_demote_adj(&sc->sc_if,
sync_if_down ? 1 : -1, "pfsync link");
}
#endif
sc->sc_sync_if_down = sync_if_down;
}
static void
pfsync_syncif_detach(void *arg)
{
struct pfsync_softc *sc = arg;
struct ifnet *ifp = &sc->sc_if;
if (ISSET(ifp->if_flags, IFF_RUNNING)) {
pfsync_down(sc);
if_down(ifp);
}
sc->sc_sync_ifidx = 0;
}
static int
pfsync_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
struct rtentry *rt)
{
m_freem(m); /* drop packet */
return (EAFNOSUPPORT);
}
static int
pfsync_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct pfsync_softc *sc = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
int error = ENOTTY;
switch (cmd) {
case SIOCSIFADDR:
error = EOPNOTSUPP;
break;
case SIOCSIFFLAGS:
if (ISSET(ifp->if_flags, IFF_UP)) {
if (!ISSET(ifp->if_flags, IFF_RUNNING))
error = pfsync_up(sc);
else
error = ENETRESET;
} else {
if (ISSET(ifp->if_flags, IFF_RUNNING))
error = pfsync_down(sc);
}
break;
case SIOCSIFMTU:
error = pfsync_set_mtu(sc, ifr->ifr_mtu);
break;
case SIOCSIFPARENT:
error = pfsync_set_parent(sc, (struct if_parent *)data);
break;
case SIOCGIFPARENT:
error = pfsync_get_parent(sc, (struct if_parent *)data);
break;
case SIOCDIFPARENT:
error = pfsync_del_parent(sc);
break;
case SIOCSETPFSYNC:
error = pfsync_set_ioc(sc, ifr);
break;
case SIOCGETPFSYNC:
error = pfsync_get_ioc(sc, ifr);
break;
default:
break;
}
if (error == ENETRESET)
error = 0;
return (error);
}
static int
pfsync_set_mtu(struct pfsync_softc *sc, unsigned int mtu)
{
struct ifnet *ifp = &sc->sc_if;
struct ifnet *ifp0;
int error = 0;
ifp0 = if_get(sc->sc_sync_ifidx);
if (ifp0 == NULL)
return (EINVAL);
if (mtu <= PFSYNC_MINPKT || mtu > ifp0->if_mtu) {
error = EINVAL;
goto put;
}
/* commit */
ifp->if_mtu = mtu;
put:
if_put(ifp0);
return (error);
}
static int
pfsync_set_parent(struct pfsync_softc *sc, const struct if_parent *p)
{
struct ifnet *ifp = &sc->sc_if;
struct ifnet *ifp0;
int error = 0;
ifp0 = if_unit(p->ifp_parent);
if (ifp0 == NULL)
return (ENXIO);
if (ifp0->if_index == sc->sc_sync_ifidx)
goto put;
if (ISSET(ifp->if_flags, IFF_RUNNING)) {
error = EBUSY;
goto put;
}
/* commit */
sc->sc_sync_ifidx = ifp0->if_index;
put:
if_put(ifp0);
return (error);
}
static int
pfsync_get_parent(struct pfsync_softc *sc, struct if_parent *p)
{
struct ifnet *ifp0;
int error = 0;
ifp0 = if_get(sc->sc_sync_ifidx);
if (ifp0 == NULL)
error = EADDRNOTAVAIL;
else
strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
if_put(ifp0);
return (error);
}
static int
pfsync_del_parent(struct pfsync_softc *sc)
{
struct ifnet *ifp = &sc->sc_if;
if (ISSET(ifp->if_flags, IFF_RUNNING))
return (EBUSY);
/* commit */
sc->sc_sync_ifidx = 0;
return (0);
}
static int
pfsync_get_ioc(struct pfsync_softc *sc, struct ifreq *ifr)
{
struct pfsyncreq pfsyncr;
struct ifnet *ifp0;
memset(&pfsyncr, 0, sizeof(pfsyncr));
ifp0 = if_get(sc->sc_sync_ifidx);
if (ifp0 != NULL) {
strlcpy(pfsyncr.pfsyncr_syncdev, ifp0->if_xname,
sizeof(pfsyncr.pfsyncr_syncdev));
}
if_put(ifp0);
pfsyncr.pfsyncr_syncpeer = sc->sc_syncpeer;
pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
pfsyncr.pfsyncr_defer = sc->sc_defer;
return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
}
static int
pfsync_set_ioc(struct pfsync_softc *sc, struct ifreq *ifr)
{
struct ifnet *ifp = &sc->sc_if;
struct pfsyncreq pfsyncr;
unsigned int sync_ifidx = sc->sc_sync_ifidx;
int wantdown = 0;
int error;
error = suser(curproc);
if (error != 0)
return (error);
error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr));
if (error != 0)
return (error);
if (pfsyncr.pfsyncr_maxupdates > 255)
return (EINVAL);
if (pfsyncr.pfsyncr_syncdev[0] != '\0') { /* set */
struct ifnet *ifp0 = if_unit(pfsyncr.pfsyncr_syncdev);
if (ifp0 == NULL)
return (ENXIO);
if (ifp0->if_index != sync_ifidx)
wantdown = 1;
sync_ifidx = ifp0->if_index;
if_put(ifp0);
} else { /* del */
wantdown = 1;
sync_ifidx = 0;
}
if (pfsyncr.pfsyncr_syncpeer.s_addr == INADDR_ANY)
pfsyncr.pfsyncr_syncpeer.s_addr = INADDR_PFSYNC_GROUP;
if (pfsyncr.pfsyncr_syncpeer.s_addr != sc->sc_syncpeer.s_addr)
wantdown = 1;
if (wantdown && ISSET(ifp->if_flags, IFF_RUNNING))
return (EBUSY);
/* commit */
sc->sc_sync_ifidx = sync_ifidx;
sc->sc_syncpeer = pfsyncr.pfsyncr_syncpeer;
sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
sc->sc_defer = pfsyncr.pfsyncr_defer;
return (0);
}
static int
pfsync_up(struct pfsync_softc *sc)
{
struct ifnet *ifp = &sc->sc_if;
struct ifnet *ifp0;
void *inm = NULL;
int error = 0;
struct ip *ip;
NET_ASSERT_LOCKED();
KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
if (sc->sc_dead)
return (ENXIO);
/*
* coordinate with pfsync_down(). if sc_up is still up and
* we're here then something else is tearing pfsync down.
*/
if (sc->sc_up)
return (EBUSY);
if (sc->sc_syncpeer.s_addr == INADDR_ANY ||
sc->sc_syncpeer.s_addr == INADDR_BROADCAST)
return (EDESTADDRREQ);
ifp0 = if_get(sc->sc_sync_ifidx);
if (ifp0 == NULL)
return (ENXIO);
if (IN_MULTICAST(sc->sc_syncpeer.s_addr)) {
if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
error = ENODEV;
goto put;
}
inm = in_addmulti(&sc->sc_syncpeer, ifp0);
if (inm == NULL) {
error = ECONNABORTED;
goto put;
}
}
sc->sc_up = 1;
ip = &sc->sc_template;
memset(ip, 0, sizeof(*ip));
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(*ip) >> 2;
ip->ip_tos = IPTOS_LOWDELAY;
/* len and id are set later */
ip->ip_off = htons(IP_DF);
ip->ip_ttl = PFSYNC_DFLTTL;
ip->ip_p = IPPROTO_PFSYNC;
ip->ip_src.s_addr = INADDR_ANY;
ip->ip_dst.s_addr = sc->sc_syncpeer.s_addr;
/* commit */
refcnt_init(&sc->sc_refs); /* IFF_RUNNING kind of owns this */
#if NCARP > 0
sc->sc_sync_if_down = 1;
carp_group_demote_adj(&sc->sc_if, 1, "pfsync up");
#endif
if_linkstatehook_add(ifp0, &sc->sc_ltask);
if_detachhook_add(ifp0, &sc->sc_dtask);
sc->sc_inm = inm;
SET(ifp->if_flags, IFF_RUNNING);
pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_UP);
refcnt_take(&sc->sc_refs); /* give one to SMR */
SMR_PTR_SET_LOCKED(&pfsyncif, sc);
pfsync_syncif_link(sc); /* try and push the bulk req state forward */
put:
if_put(ifp0);
return (error);
}
static struct mbuf *
pfsync_encap(struct pfsync_softc *sc, struct mbuf *m)
{
struct {
struct ip ip;
struct pfsync_header ph;
} __packed __aligned(4) *h;
unsigned int mlen = m->m_pkthdr.len;
m = m_prepend(m, sizeof(*h), M_DONTWAIT);
if (m == NULL)
return (NULL);
h = mtod(m, void *);
memset(h, 0, sizeof(*h));
mlen += sizeof(h->ph);
h->ph.version = PFSYNC_VERSION;
h->ph.len = htons(mlen);
/* h->ph.pfcksum */
mlen += sizeof(h->ip);
h->ip = sc->sc_template;
h->ip.ip_len = htons(mlen);
h->ip.ip_id = htons(ip_randomid());
return (m);
}
static void
pfsync_bulk_req_send(struct pfsync_softc *sc)
{
struct {
struct pfsync_subheader subh;
struct pfsync_upd_req ur;
} __packed __aligned(4) *h;
unsigned mlen = max_linkhdr +
sizeof(struct ip) + sizeof(struct pfsync_header) + sizeof(*h);
struct mbuf *m;
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL)
goto fail;
if (mlen > MHLEN) {
MCLGETL(m, M_DONTWAIT, mlen);
if (!ISSET(m->m_flags, M_EXT))
goto drop;
}
m_align(m, sizeof(*h));
m->m_len = m->m_pkthdr.len = sizeof(*h);
h = mtod(m, void *);
memset(h, 0, sizeof(*h));
h->subh.action = PFSYNC_ACT_UPD_REQ;
h->subh.len = sizeof(h->ur) >> 2;
h->subh.count = htons(1);
h->ur.id = htobe64(0);
h->ur.creatorid = htobe32(0);
m = pfsync_encap(sc, m);
if (m == NULL)
goto fail;
pfsync_sendout(sc, m);
return;
drop:
m_freem(m);
fail:
printf("%s: unable to request bulk update\n", sc->sc_if.if_xname);
}
static void
pfsync_bulk_req_nstate(struct pfsync_softc *sc,
enum pfsync_bulk_req_state nstate, int seconds)
{
sc->sc_bulk_req.req_state = nstate;
if (seconds > 0)
timeout_add_sec(&sc->sc_bulk_req.req_tmo, seconds);
else
timeout_del(&sc->sc_bulk_req.req_tmo);
}
static void
pfsync_bulk_req_invstate(struct pfsync_softc *sc,
enum pfsync_bulk_req_event evt)
{
panic("%s: unexpected event %s in state %s", sc->sc_if.if_xname,
pfsync_bulk_req_event_names[evt],
pfsync_bulk_req_state_names[sc->sc_bulk_req.req_state]);
}
static void
pfsync_bulk_req_nstate_bulk(struct pfsync_softc *sc)
{
/* calculate the number of packets we expect */
int t = pf_pool_limits[PF_LIMIT_STATES].limit /
((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
sizeof(struct pfsync_state));
/* turn it into seconds */
t /= 1000 / PFSYNC_BULK_SND_IVAL_MS;
if (t == 0)
t = 1;
pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_BULK, t * 4);
}
static inline void
pfsync_bulk_req_nstate_done(struct pfsync_softc *sc)
{
pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_DONE, 0);
KASSERT(sc->sc_bulk_req.req_demoted == 1);
sc->sc_bulk_req.req_demoted = 0;
#if NCARP > 0
carp_group_demote_adj(&sc->sc_if, -32, "pfsync done");
#endif
}
static void
pfsync_bulk_req_evt(struct pfsync_softc *sc, enum pfsync_bulk_req_event evt)
{
struct ifnet *ifp = &sc->sc_if;
rw_enter_write(&sc->sc_bulk_req.req_lock);
pfsync_dprintf(sc, "%s state %s evt %s", __func__,
pfsync_bulk_req_state_names[sc->sc_bulk_req.req_state],
pfsync_bulk_req_event_names[evt]);
if (evt == PFSYNC_BREQ_EVT_DOWN) {
/* unconditionally move down */
sc->sc_bulk_req.req_tries = 0;
pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_NONE, 0);
if (sc->sc_bulk_req.req_demoted) {
sc->sc_bulk_req.req_demoted = 0;
#if NCARP > 0
carp_group_demote_adj(&sc->sc_if, -32,
"pfsync down");
#endif
}
} else switch (sc->sc_bulk_req.req_state) {
case PFSYNC_BREQ_S_NONE:
switch (evt) {
case PFSYNC_BREQ_EVT_UP:
KASSERT(sc->sc_bulk_req.req_demoted == 0);
sc->sc_bulk_req.req_demoted = 1;
#if NCARP > 0
carp_group_demote_adj(&sc->sc_if, 32,
"pfsync start");
#endif
pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_START, 30);
break;
default:
pfsync_bulk_req_invstate(sc, evt);
}
break;
case PFSYNC_BREQ_S_START:
switch (evt) {
case PFSYNC_BREQ_EVT_LINK:
pfsync_bulk_req_send(sc);
pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_SENT, 2);
break;
case PFSYNC_BREQ_EVT_TMO:
pfsync_dprintf(sc, "timeout waiting for link");
pfsync_bulk_req_nstate_done(sc);
break;
case PFSYNC_BREQ_EVT_BUS_START:
pfsync_bulk_req_nstate_bulk(sc);
break;
case PFSYNC_BREQ_EVT_BUS_END:
/* ignore this */
break;
default:
pfsync_bulk_req_invstate(sc, evt);
}
break;
case PFSYNC_BREQ_S_SENT:
switch (evt) {
case PFSYNC_BREQ_EVT_BUS_START:
pfsync_bulk_req_nstate_bulk(sc);
break;
case PFSYNC_BREQ_EVT_BUS_END:
case PFSYNC_BREQ_EVT_LINK:
/* ignore this */
break;
case PFSYNC_BREQ_EVT_TMO:
if (++sc->sc_bulk_req.req_tries <
PFSYNC_MAX_BULKTRIES) {
pfsync_bulk_req_send(sc);
pfsync_bulk_req_nstate(sc,
PFSYNC_BREQ_S_SENT, 2);
break;
}
pfsync_dprintf(sc,
"timeout waiting for bulk transfer start");
pfsync_bulk_req_nstate_done(sc);
break;
default:
pfsync_bulk_req_invstate(sc, evt);
}
break;
case PFSYNC_BREQ_S_BULK:
switch (evt) {
case PFSYNC_BREQ_EVT_BUS_START:
case PFSYNC_BREQ_EVT_LINK:
/* ignore this */
break;
case PFSYNC_BREQ_EVT_BUS_END:
pfsync_bulk_req_nstate_done(sc);
break;
case PFSYNC_BREQ_EVT_TMO:
if (++sc->sc_bulk_req.req_tries <
PFSYNC_MAX_BULKTRIES) {
pfsync_bulk_req_send(sc);
pfsync_bulk_req_nstate(sc,
PFSYNC_BREQ_S_SENT, 2);
}
pfsync_dprintf(sc,
"timeout waiting for bulk transfer end");
pfsync_bulk_req_nstate_done(sc);
break;
default:
pfsync_bulk_req_invstate(sc, evt);
}
break;
case PFSYNC_BREQ_S_DONE: /* pfsync is up and running */
switch (evt) {
case PFSYNC_BREQ_EVT_BUS_START:
case PFSYNC_BREQ_EVT_BUS_END:
case PFSYNC_BREQ_EVT_LINK:
/* nops */
break;
default:
pfsync_bulk_req_invstate(sc, evt);
}
break;
default:
panic("%s: unknown event %d", ifp->if_xname, evt);
/* NOTREACHED */
}
rw_exit_write(&sc->sc_bulk_req.req_lock);
}
static void
pfsync_bulk_req_tmo(void *arg)
{
struct pfsync_softc *sc = arg;
NET_LOCK();
pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_TMO);
NET_UNLOCK();
}
static int
pfsync_down(struct pfsync_softc *sc)
{
struct ifnet *ifp = &sc->sc_if;
struct ifnet *ifp0;
struct smr_entry smr;
size_t i;
void *inm = NULL;
unsigned int sndbar = 0;
struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
struct pfsync_deferral *pd;
NET_ASSERT_LOCKED();
KASSERT(ISSET(ifp->if_flags, IFF_RUNNING));
/*
* tearing down pfsync involves waiting for pfsync to stop
* running in various contexts including softnet taskqs.
* this thread cannot hold netlock while waiting for a
* barrier in softnet because softnet might be waiting for
* the netlock. sc->sc_up is used to coordinate with
* pfsync_up.
*/
CLR(ifp->if_flags, IFF_RUNNING);
ifp0 = if_get(sc->sc_sync_ifidx);
if (ifp0 != NULL) {
if_linkstatehook_del(ifp0, &sc->sc_ltask);
if_detachhook_del(ifp0, &sc->sc_dtask);
}
if_put(ifp0);
#if NCARP > 0
if (sc->sc_sync_if_down)
carp_group_demote_adj(&sc->sc_if, -1, "pfsync down");
#endif
NET_UNLOCK();
KASSERTMSG(SMR_PTR_GET_LOCKED(&pfsyncif) == sc,
"pfsyncif %p != sc %p", pfsyncif, sc);
SMR_PTR_SET_LOCKED(&pfsyncif, NULL);
smr_init(&smr);
smr_call(&smr, (void (*)(void *))refcnt_rele_wake, &sc->sc_refs);
/* stop pf producing work before cleaning up the timeouts and tasks */
refcnt_finalize(&sc->sc_refs, "pfsyncfini");
pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_DOWN);
rw_enter_read(&pf_state_list.pfs_rwl);
rw_enter_write(&sc->sc_bulk_snd.snd_lock);
if (sc->sc_bulk_snd.snd_tail != NULL) {
sndbar = !timeout_del(&sc->sc_bulk_snd.snd_tmo);
sc->sc_bulk_snd.snd_again = 0;
sc->sc_bulk_snd.snd_next = NULL;
sc->sc_bulk_snd.snd_tail = NULL;
}
rw_exit_write(&sc->sc_bulk_snd.snd_lock);
rw_exit_read(&pf_state_list.pfs_rwl);
/*
* do a single barrier for all the timeouts. because the
* timeouts in each slice are configured the same way, the
* barrier for one will work for all of them.
*/
for (i = 0; i < nitems(sc->sc_slices); i++) {
struct pfsync_slice *s = &sc->sc_slices[i];
timeout_del(&s->s_tmo);
task_del(s->s_softnet, &s->s_task);
task_del(s->s_softnet, &s->s_send);
timeout_del(&s->s_deferrals_tmo);
task_del(s->s_softnet, &s->s_deferrals_task);
}
timeout_barrier(&sc->sc_slices[0].s_tmo);
timeout_barrier(&sc->sc_bulk_req.req_tmo); /* XXX proc */
if (sndbar) {
/* technically the preceding barrier does the same job */
timeout_barrier(&sc->sc_bulk_snd.snd_tmo);
}
net_tq_barriers("pfsyncbar");
/* pfsync is no longer running */
if (sc->sc_inm != NULL) {
inm = sc->sc_inm;
sc->sc_inm = NULL;
}
for (i = 0; i < nitems(sc->sc_slices); i++) {
struct pfsync_slice *s = &sc->sc_slices[i];
struct pf_state *st;
pfsync_slice_drop(sc, s);
mq_purge(&s->s_sendq);
while ((pd = TAILQ_FIRST(&s->s_deferrals)) != NULL) {
TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
st = pd->pd_st;
st->sync_defer = NULL;
TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
}
s->s_deferred = 0;
}
NET_LOCK();
sc->sc_up = 0;
if (inm != NULL)
in_delmulti(inm);
while ((pd = TAILQ_FIRST(&pds)) != NULL) {
TAILQ_REMOVE(&pds, pd, pd_entry);
pfsync_defer_output(pd);
}
return (0);
}
int
pfsync_is_up(void)
{
int rv;
smr_read_enter();
rv = SMR_PTR_GET(&pfsyncif) != NULL;
smr_read_leave();
return (rv);
}
static void
pfsync_start(struct ifqueue *ifq)
{
ifq_purge(ifq);
}
struct pfsync_q {
void (*write)(struct pf_state *, void *);
size_t len;
u_int8_t action;
};
static struct pfsync_slice *
pfsync_slice_enter(struct pfsync_softc *sc, const struct pf_state *st)
{
unsigned int idx = st->key[0]->hash % nitems(sc->sc_slices);
struct pfsync_slice *s = &sc->sc_slices[idx];
if (!mtx_enter_try(&s->s_mtx)) {
mtx_enter(&s->s_mtx);
s->s_stat_contended++;
}
s->s_stat_locks++;
return (s);
}
static void
pfsync_slice_leave(struct pfsync_softc *sc, struct pfsync_slice *s)
{
mtx_leave(&s->s_mtx);
}
/* we have one of these for every PFSYNC_S_ */
static void pfsync_out_state(struct pf_state *, void *);
static void pfsync_out_iack(struct pf_state *, void *);
static void pfsync_out_upd_c(struct pf_state *, void *);
static void pfsync_out_del(struct pf_state *, void *);
#if defined(IPSEC)
static void pfsync_out_tdb(struct tdb *, void *);
#endif
static const struct pfsync_q pfsync_qs[] = {
{ pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
{ pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C },
{ pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS },
{ pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }
};
static void
pfsync_out_state(struct pf_state *st, void *buf)
{
struct pfsync_state *sp = buf;
mtx_enter(&st->mtx);
pf_state_export(sp, st);
mtx_leave(&st->mtx);
}
static void
pfsync_out_iack(struct pf_state *st, void *buf)
{
struct pfsync_ins_ack *iack = buf;
iack->id = st->id;
iack->creatorid = st->creatorid;
}
static void
pfsync_out_upd_c(struct pf_state *st, void *buf)
{
struct pfsync_upd_c *up = buf;
memset(up, 0, sizeof(*up));
up->id = st->id;
up->creatorid = st->creatorid;
mtx_enter(&st->mtx);
pf_state_peer_hton(&st->src, &up->src);
pf_state_peer_hton(&st->dst, &up->dst);
up->timeout = st->timeout;
mtx_leave(&st->mtx);
}
static void
pfsync_out_del(struct pf_state *st, void *buf)
{
struct pfsync_del_c *dp = buf;
dp->id = st->id;
dp->creatorid = st->creatorid;
st->sync_state = PFSYNC_S_DEAD;
}
#if defined(IPSEC)
static inline void
pfsync_tdb_enter(struct tdb *tdb)
{
mtx_enter(&tdb->tdb_mtx);
}
static inline void
pfsync_tdb_leave(struct tdb *tdb)
{
unsigned int snapped = ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
mtx_leave(&tdb->tdb_mtx);
if (snapped)
wakeup_one(&tdb->tdb_updates);
}
#endif /* defined(IPSEC) */
static void
pfsync_slice_drop(struct pfsync_softc *sc, struct pfsync_slice *s)
{
struct pf_state *st;
int q;
#if defined(IPSEC)
struct tdb *tdb;
#endif
for (q = 0; q < nitems(s->s_qs); q++) {
if (TAILQ_EMPTY(&s->s_qs[q]))
continue;
while ((st = TAILQ_FIRST(&s->s_qs[q])) != NULL) {
TAILQ_REMOVE(&s->s_qs[q], st, sync_list);
#ifdef PFSYNC_DEBUG
KASSERT(st->sync_state == q);
#endif
st->sync_state = PFSYNC_S_NONE;
pf_state_unref(st);
}
}
#if defined(IPSEC)
while ((tdb = TAILQ_FIRST(&s->s_tdb_q)) != NULL) {
TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
pfsync_tdb_enter(tdb);
KASSERT(ISSET(tdb->tdb_flags, TDBF_PFSYNC));
CLR(tdb->tdb_flags, TDBF_PFSYNC);
pfsync_tdb_leave(tdb);
}
#endif /* defined(IPSEC) */
timeout_del(&s->s_tmo);
s->s_len = PFSYNC_MINPKT;
}
static struct mbuf *
pfsync_slice_write(struct pfsync_slice *s)
{
struct pfsync_softc *sc = s->s_pfsync;
struct mbuf *m;
struct ip *ip;
struct pfsync_header *ph;
struct pfsync_subheader *subh;
unsigned int mlen = max_linkhdr + s->s_len;
unsigned int q, count;
caddr_t ptr;
size_t off;
MUTEX_ASSERT_LOCKED(&s->s_mtx);
if (s->s_len == PFSYNC_MINPKT) {
s->s_stat_write_nop++;
return (NULL);
}
task_del(s->s_softnet, &s->s_task);
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL)
goto drop;
if (mlen > MHLEN) {
MCLGETL(m, M_DONTWAIT, mlen);
if (!ISSET(m->m_flags, M_EXT))
goto drop;
}
m_align(m, s->s_len);
m->m_len = m->m_pkthdr.len = s->s_len;
ptr = mtod(m, caddr_t);
off = 0;
ip = (struct ip *)(ptr + off);
off += sizeof(*ip);
*ip = sc->sc_template;
ip->ip_len = htons(m->m_pkthdr.len);
ip->ip_id = htons(ip_randomid());
ph = (struct pfsync_header *)(ptr + off);
off += sizeof(*ph);
memset(ph, 0, sizeof(*ph));
ph->version = PFSYNC_VERSION;
ph->len = htons(m->m_pkthdr.len - sizeof(*ip));
for (q = 0; q < nitems(s->s_qs); q++) {
struct pf_state_queue *psq = &s->s_qs[q];
struct pf_state *st;
if (TAILQ_EMPTY(psq))
continue;
subh = (struct pfsync_subheader *)(ptr + off);
off += sizeof(*subh);
count = 0;
while ((st = TAILQ_FIRST(psq)) != NULL) {
TAILQ_REMOVE(psq, st, sync_list);
count++;
KASSERT(st->sync_state == q);
/* the write handler below may override this */
st->sync_state = PFSYNC_S_NONE;
pfsync_qs[q].write(st, ptr + off);
off += pfsync_qs[q].len;
pf_state_unref(st);
}
subh->action = pfsync_qs[q].action;
subh->len = pfsync_qs[q].len >> 2;
subh->count = htons(count);
}
#if defined(IPSEC)
if (!TAILQ_EMPTY(&s->s_tdb_q)) {
struct tdb *tdb;
subh = (struct pfsync_subheader *)(ptr + off);
off += sizeof(*subh);
count = 0;
while ((tdb = TAILQ_FIRST(&s->s_tdb_q)) != NULL) {
TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
count++;
pfsync_tdb_enter(tdb);
KASSERT(ISSET(tdb->tdb_flags, TDBF_PFSYNC));
/* get a consistent view of the counters */
pfsync_out_tdb(tdb, ptr + off);
CLR(tdb->tdb_flags, TDBF_PFSYNC);
pfsync_tdb_leave(tdb);
off += sizeof(struct pfsync_tdb);
}
subh->action = PFSYNC_ACT_TDB;
subh->len = sizeof(struct pfsync_tdb) >> 2;
subh->count = htons(count);
}
#endif
timeout_del(&s->s_tmo);
s->s_len = PFSYNC_MINPKT;
return (m);
drop:
m_freem(m);
pfsyncstat_inc(pfsyncs_onomem);
pfsync_slice_drop(sc, s);
return (NULL);
}
static void
pfsync_sendout(struct pfsync_softc *sc, struct mbuf *m)
{
struct ip_moptions imo;
unsigned int len = m->m_pkthdr.len;
#if NBPFILTER > 0
caddr_t if_bpf = sc->sc_if.if_bpf;
if (if_bpf)
bpf_mtap(if_bpf, m, BPF_DIRECTION_OUT);
#endif
imo.imo_ifidx = sc->sc_sync_ifidx;
imo.imo_ttl = PFSYNC_DFLTTL;
imo.imo_loop = 0;
m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) == 0) {
counters_pkt(sc->sc_if.if_counters, ifc_opackets,
ifc_obytes, len);
pfsyncstat_inc(pfsyncs_opackets);
} else {
counters_inc(sc->sc_if.if_counters, ifc_oerrors);
pfsyncstat_inc(pfsyncs_oerrors);
}
}
static void
pfsync_slice_tmo(void *arg)
{
struct pfsync_slice *s = arg;
task_add(s->s_softnet, &s->s_task);
}
static void
pfsync_slice_sched(struct pfsync_slice *s)
{
s->s_stat_task_add++;
task_add(s->s_softnet, &s->s_task);
}
static void
pfsync_slice_task(void *arg)
{
struct pfsync_slice *s = arg;
struct mbuf *m;
mtx_enter(&s->s_mtx);
s->s_stat_task_run++;
m = pfsync_slice_write(s);
mtx_leave(&s->s_mtx);
if (m != NULL) {
NET_LOCK();
pfsync_sendout(s->s_pfsync, m);
NET_UNLOCK();
}
}
static void
pfsync_slice_sendq(void *arg)
{
struct pfsync_slice *s = arg;
struct mbuf_list ml;
struct mbuf *m;
mq_delist(&s->s_sendq, &ml);
if (ml_empty(&ml))
return;
mtx_enter(&s->s_mtx);
s->s_stat_dequeue++;
mtx_leave(&s->s_mtx);
NET_LOCK();
while ((m = ml_dequeue(&ml)) != NULL)
pfsync_sendout(s->s_pfsync, m);
NET_UNLOCK();
}
static void
pfsync_q_ins(struct pfsync_slice *s, struct pf_state *st, unsigned int q)
{
size_t nlen = pfsync_qs[q].len;
struct mbuf *m = NULL;
MUTEX_ASSERT_LOCKED(&s->s_mtx);
KASSERT(st->sync_state == PFSYNC_S_NONE);
KASSERT(s->s_len >= PFSYNC_MINPKT);
if (TAILQ_EMPTY(&s->s_qs[q]))
nlen += sizeof(struct pfsync_subheader);
if (s->s_len + nlen > s->s_pfsync->sc_if.if_mtu) {
m = pfsync_slice_write(s);
if (m != NULL) {
s->s_stat_enqueue++;
if (mq_enqueue(&s->s_sendq, m) == 0)
task_add(s->s_softnet, &s->s_send);
}
nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
}
s->s_len += nlen;
pf_state_ref(st);
TAILQ_INSERT_TAIL(&s->s_qs[q], st, sync_list);
st->sync_state = q;
if (!timeout_pending(&s->s_tmo))
timeout_add_sec(&s->s_tmo, 1);
}
static void
pfsync_q_del(struct pfsync_slice *s, struct pf_state *st)
{
unsigned int q = st->sync_state;
MUTEX_ASSERT_LOCKED(&s->s_mtx);
KASSERT(st->sync_state < PFSYNC_S_NONE);
st->sync_state = PFSYNC_S_NONE;
TAILQ_REMOVE(&s->s_qs[q], st, sync_list);
pf_state_unref(st);
s->s_len -= pfsync_qs[q].len;
if (TAILQ_EMPTY(&s->s_qs[q]))
s->s_len -= sizeof(struct pfsync_subheader);
}
/*
* the pfsync hooks that pf calls
*/
void
pfsync_init_state(struct pf_state *st, const struct pf_state_key *skw,
const struct pf_state_key *sks, int flags)
{
/* this is called before pf_state_insert */
if (skw->proto == IPPROTO_PFSYNC)
SET(st->state_flags, PFSTATE_NOSYNC);
if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
st->sync_state = PFSYNC_S_DEAD;
return;
}
if (ISSET(flags, PFSYNC_SI_IOCTL)) {
/* all good */
return;
}
/* state came off the wire */
if (ISSET(flags, PFSYNC_SI_PFSYNC)) {
if (ISSET(st->state_flags, PFSTATE_ACK)) {
CLR(st->state_flags, PFSTATE_ACK);
/* peer wants an iack, not an insert */
st->sync_state = PFSYNC_S_SYNC;
} else
st->sync_state = PFSYNC_S_PFSYNC;
}
}
void
pfsync_insert_state(struct pf_state *st)
{
struct pfsync_softc *sc;
MUTEX_ASSERT_UNLOCKED(&st->mtx);
if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
st->sync_state == PFSYNC_S_DEAD)
return;
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc != NULL) {
struct pfsync_slice *s = pfsync_slice_enter(sc, st);
switch (st->sync_state) {
case PFSYNC_S_UPD_C:
/* we must have lost a race after insert */
pfsync_q_del(s, st);
/* FALLTHROUGH */
case PFSYNC_S_NONE:
pfsync_q_ins(s, st, PFSYNC_S_INS);
break;
case PFSYNC_S_SYNC:
st->sync_state = PFSYNC_S_NONE; /* gross */
pfsync_q_ins(s, st, PFSYNC_S_IACK);
pfsync_slice_sched(s); /* the peer is waiting */
break;
case PFSYNC_S_PFSYNC:
/* state was just inserted by pfsync */
st->sync_state = PFSYNC_S_NONE;
break;
default:
panic("%s: state %p unexpected sync_state %d",
__func__, st, st->sync_state);
/* NOTREACHED */
}
pfsync_slice_leave(sc, s);
}
smr_read_leave();
}
void
pfsync_update_state(struct pf_state *st)
{
struct pfsync_softc *sc;
MUTEX_ASSERT_UNLOCKED(&st->mtx);
if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
st->sync_state == PFSYNC_S_DEAD)
return;
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc != NULL) {
struct pfsync_slice *s = pfsync_slice_enter(sc, st);
int sync = 0;
switch (st->sync_state) {
case PFSYNC_S_UPD_C:
case PFSYNC_S_UPD:
/* we're already handling it */
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
st->sync_updates++;
if (st->sync_updates >= sc->sc_maxupdates)
sync = 1;
}
/* FALLTHROUGH */
case PFSYNC_S_INS:
case PFSYNC_S_DEL:
case PFSYNC_S_DEAD:
break;
case PFSYNC_S_IACK:
pfsync_q_del(s, st);
/* FALLTHROUGH */
case PFSYNC_S_NONE:
pfsync_q_ins(s, st, PFSYNC_S_UPD_C);
st->sync_updates = 0;
break;
default:
panic("%s: state %p unexpected sync_state %d",
__func__, st, st->sync_state);
/* NOTREACHED */
}
if (!sync && (getuptime() - st->pfsync_time) < 2)
sync = 1;
if (sync)
pfsync_slice_sched(s);
pfsync_slice_leave(sc, s);
}
smr_read_leave();
}
void
pfsync_delete_state(struct pf_state *st)
{
struct pfsync_softc *sc;
MUTEX_ASSERT_UNLOCKED(&st->mtx);
if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
st->sync_state == PFSYNC_S_DEAD)
return;
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc != NULL) {
struct pfsync_slice *s = pfsync_slice_enter(sc, st);
switch (st->sync_state) {
case PFSYNC_S_INS:
/* let's pretend this never happened */
pfsync_q_del(s, st);
break;
case PFSYNC_S_UPD_C:
case PFSYNC_S_UPD:
case PFSYNC_S_IACK:
pfsync_q_del(s, st);
/* FALLTHROUGH */
case PFSYNC_S_NONE:
pfsync_q_ins(s, st, PFSYNC_S_DEL);
st->sync_updates = 0;
break;
case PFSYNC_S_DEL:
case PFSYNC_S_DEAD:
/* XXX we should count this */
break;
default:
panic("%s: state %p unexpected sync_state %d",
__func__, st, st->sync_state);
/* NOTREACHED */
}
pfsync_slice_leave(sc, s);
}
smr_read_leave();
}
struct pfsync_subh_clr {
struct pfsync_subheader subh;
struct pfsync_clr clr;
} __packed __aligned(4);
void
pfsync_clear_states(u_int32_t creatorid, const char *ifname)
{
struct pfsync_softc *sc;
struct pfsync_subh_clr *h;
struct mbuf *m;
unsigned int hlen, mlen;
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc != NULL)
refcnt_take(&sc->sc_refs);
smr_read_leave();
if (sc == NULL)
return;
hlen = sizeof(sc->sc_template) +
sizeof(struct pfsync_header) +
sizeof(*h);
mlen = max_linkhdr + hlen;
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL) {
/* count error */
goto leave;
}
if (mlen > MHLEN) {
MCLGETL(m, M_DONTWAIT, mlen);
if (!ISSET(m->m_flags, M_EXT)) {
m_freem(m);
goto leave;
}
}
m_align(m, sizeof(*h));
h = mtod(m, struct pfsync_subh_clr *);
h->subh.action = PFSYNC_ACT_CLR;
h->subh.len = sizeof(h->clr) >> 2;
h->subh.count = htons(1);
strlcpy(h->clr.ifname, ifname, sizeof(h->clr.ifname));
h->clr.creatorid = creatorid;
m->m_pkthdr.len = m->m_len = sizeof(*h);
m = pfsync_encap(sc, m);
if (m == NULL)
goto leave;
pfsync_sendout(sc, m);
leave:
refcnt_rele_wake(&sc->sc_refs);
}
int
pfsync_state_in_use(struct pf_state *st)
{
struct pfsync_softc *sc;
int rv = 0;
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc != NULL) {
/*
* pfsync bulk sends run inside
* rw_enter_read(&pf_state_list.pfs_rwl), and this
* code (pfsync_state_in_use) is only called from the
* purge code inside
* rw_enter_write(&pf_state_list.pfs_rwl). therefore,
* those two sections are exclusive so we can safely
* look at the bulk send pointers.
*/
/* rw_assert_wrlock(&pf_state_list.pfs_rwl); */
if (sc->sc_bulk_snd.snd_next == st ||
sc->sc_bulk_snd.snd_tail == st)
rv = 1;
}
smr_read_leave();
return (rv);
}
int
pfsync_defer(struct pf_state *st, struct mbuf *m)
{
struct pfsync_softc *sc;
struct pfsync_slice *s;
struct pfsync_deferral *pd;
int sched = 0;
int rv = 0;
if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
ISSET(m->m_flags, M_BCAST|M_MCAST))
return (0);
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc == NULL || !sc->sc_defer)
goto leave;
pd = pool_get(&pfsync_deferrals_pool, M_NOWAIT);
if (pd == NULL) {
goto leave;
}
s = pfsync_slice_enter(sc, st);
s->s_stat_defer_add++;
pd->pd_st = pf_state_ref(st);
pd->pd_m = m;
pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC;
m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
st->sync_defer = pd;
sched = s->s_deferred++;
TAILQ_INSERT_TAIL(&s->s_deferrals, pd, pd_entry);
if (sched == 0)
timeout_add_nsec(&s->s_deferrals_tmo, PFSYNC_DEFER_NSEC);
else if (sched >= PFSYNC_DEFER_LIMIT) {
s->s_stat_defer_overlimit++;
timeout_del(&s->s_deferrals_tmo);
task_add(s->s_softnet, &s->s_deferrals_task);
}
pfsync_slice_sched(s);
pfsync_slice_leave(sc, s);
rv = 1;
leave:
smr_read_leave();
return (rv);
}
static void
pfsync_deferred(struct pfsync_softc *sc, struct pf_state *st)
{
struct pfsync_slice *s;
struct pfsync_deferral *pd;
s = pfsync_slice_enter(sc, st);
pd = st->sync_defer;
if (pd != NULL) {
s->s_stat_defer_ack++;
TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
s->s_deferred--;
st = pd->pd_st;
st->sync_defer = NULL;
}
pfsync_slice_leave(sc, s);
if (pd != NULL)
pfsync_defer_output(pd);
}
static void
pfsync_deferrals_tmo(void *arg)
{
struct pfsync_slice *s = arg;
if (READ_ONCE(s->s_deferred) > 0)
task_add(s->s_softnet, &s->s_deferrals_task);
}
static void
pfsync_deferrals_task(void *arg)
{
struct pfsync_slice *s = arg;
struct pfsync_deferral *pd;
struct pf_state *st;
uint64_t now, nsec = 0;
struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
now = getnsecuptime();
mtx_enter(&s->s_mtx);
s->s_stat_defer_run++; /* maybe move this into the loop */
for (;;) {
pd = TAILQ_FIRST(&s->s_deferrals);
if (pd == NULL)
break;
if (s->s_deferred < PFSYNC_DEFER_LIMIT &&
now < pd->pd_deadline) {
nsec = pd->pd_deadline - now;
break;
}
TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
s->s_deferred--;
/*
* detach the pd from the state. the pd still refers
* to the state though.
*/
st = pd->pd_st;
st->sync_defer = NULL;
TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
}
mtx_leave(&s->s_mtx);
if (nsec > 0) {
/* we were looking at a pd, but it wasn't old enough */
timeout_add_nsec(&s->s_deferrals_tmo, nsec);
}
if (TAILQ_EMPTY(&pds))
return;
NET_LOCK();
while ((pd = TAILQ_FIRST(&pds)) != NULL) {
TAILQ_REMOVE(&pds, pd, pd_entry);
pfsync_defer_output(pd);
}
NET_UNLOCK();
}
static void
pfsync_defer_output(struct pfsync_deferral *pd)
{
struct pf_pdesc pdesc;
struct pf_state *st = pd->pd_st;
if (st->rt == PF_ROUTETO) {
if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
st->direction, NULL, pd->pd_m, NULL) != PF_PASS)
return;
switch (st->key[PF_SK_WIRE]->af) {
case AF_INET:
pf_route(&pdesc, st);
break;
#ifdef INET6
case AF_INET6:
pf_route6(&pdesc, st);
break;
#endif /* INET6 */
default:
unhandled_af(st->key[PF_SK_WIRE]->af);
}
pd->pd_m = pdesc.m;
} else {
switch (st->key[PF_SK_WIRE]->af) {
case AF_INET:
ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0);
break;
#ifdef INET6
case AF_INET6:
ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
break;
#endif /* INET6 */
default:
unhandled_af(st->key[PF_SK_WIRE]->af);
}
pd->pd_m = NULL;
}
pf_state_unref(st);
m_freem(pd->pd_m);
pool_put(&pfsync_deferrals_pool, pd);
}
struct pfsync_subh_bus {
struct pfsync_subheader subh;
struct pfsync_bus bus;
} __packed __aligned(4);
static unsigned int
pfsync_bulk_snd_bus(struct pfsync_softc *sc,
struct mbuf *m, const unsigned int space,
uint32_t endtime, uint8_t status)
{
struct pfsync_subh_bus *h;
unsigned int nlen;
nlen = m->m_len + sizeof(*h);
if (space < nlen)
return (0);
h = (struct pfsync_subh_bus *)(mtod(m, caddr_t) + m->m_len);
memset(h, 0, sizeof(*h));
h->subh.action = PFSYNC_ACT_BUS;
h->subh.len = sizeof(h->bus) >> 2;
h->subh.count = htons(1);
h->bus.creatorid = pf_status.hostid;
h->bus.endtime = htonl(endtime);
h->bus.status = status;
m->m_len = nlen;
return (1);
}
static unsigned int
pfsync_bulk_snd_states(struct pfsync_softc *sc,
struct mbuf *m, const unsigned int space, unsigned int len)
{
struct pf_state *st;
struct pfsync_state *sp;
unsigned int nlen;
unsigned int count = 0;
st = sc->sc_bulk_snd.snd_next;
for (;;) {
nlen = len + sizeof(*sp);
sp = (struct pfsync_state *)(mtod(m, caddr_t) + len);
if (space < nlen)
break;
mtx_enter(&st->mtx);
pf_state_export(sp, st);
mtx_leave(&st->mtx);
/* commit */
count++;
m->m_len = len = nlen;
if (st == sc->sc_bulk_snd.snd_tail) {
if (pfsync_bulk_snd_bus(sc, m, space,
0, PFSYNC_BUS_END) == 0) {
/* couldn't fit the BUS */
st = NULL;
break;
}
/* this BUS is done */
pfsync_dprintf(sc, "bulk send done (%s)", __func__);
sc->sc_bulk_snd.snd_again = 0; /* XXX */
sc->sc_bulk_snd.snd_next = NULL;
sc->sc_bulk_snd.snd_tail = NULL;
return (count);
}
st = TAILQ_NEXT(st, entry_list);
}
/* there's still work to do */
sc->sc_bulk_snd.snd_next = st;
timeout_add_msec(&sc->sc_bulk_snd.snd_tmo, PFSYNC_BULK_SND_IVAL_MS);
return (count);
}
static unsigned int
pfsync_bulk_snd_sub(struct pfsync_softc *sc,
struct mbuf *m, const unsigned int space)
{
struct pfsync_subheader *subh;
unsigned int count;
unsigned int len, nlen;
len = m->m_len;
nlen = len + sizeof(*subh);
if (nlen > space)
return (0);
subh = (struct pfsync_subheader *)(mtod(m, caddr_t) + len);
/*
* pfsync_bulk_snd_states only updates m->m_len after
* filling in a state after the offset we gave it.
*/
count = pfsync_bulk_snd_states(sc, m, space, nlen);
if (count == 0)
return (0);
subh->action = PFSYNC_ACT_UPD;
subh->len = sizeof(struct pfsync_state) >> 2;
subh->count = htons(count);
return (count);
}
static void
pfsync_bulk_snd_start(struct pfsync_softc *sc)
{
const unsigned int space = sc->sc_if.if_mtu -
(sizeof(struct ip) + sizeof(struct pfsync_header));
struct mbuf *m;
rw_enter_read(&pf_state_list.pfs_rwl);
rw_enter_write(&sc->sc_bulk_snd.snd_lock);
if (sc->sc_bulk_snd.snd_next != NULL) {
sc->sc_bulk_snd.snd_again = 1;
goto leave;
}
mtx_enter(&pf_state_list.pfs_mtx);
sc->sc_bulk_snd.snd_next = TAILQ_FIRST(&pf_state_list.pfs_list);
sc->sc_bulk_snd.snd_tail = TAILQ_LAST(&pf_state_list.pfs_list,
pf_state_queue);
mtx_leave(&pf_state_list.pfs_mtx);
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL)
goto leave;
MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_if.if_mtu);
if (!ISSET(m->m_flags, M_EXT)) {
/* some error++ */
m_freem(m); /* drop */
goto leave;
}
m_align(m, space);
m->m_len = 0;
if (sc->sc_bulk_snd.snd_tail == NULL) {
pfsync_dprintf(sc, "bulk send empty (%s)", __func__);
/* list is empty */
if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_END) == 0)
panic("%s: mtu is too low", __func__);
goto encap;
}
pfsync_dprintf(sc, "bulk send start (%s)", __func__);
/* start a bulk update. */
if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_START) == 0)
panic("%s: mtu is too low", __func__);
/* fill it up with state updates. */
pfsync_bulk_snd_sub(sc, m, space);
encap:
m->m_pkthdr.len = m->m_len;
m = pfsync_encap(sc, m);
if (m == NULL)
goto leave;
pfsync_sendout(sc, m);
leave:
rw_exit_write(&sc->sc_bulk_snd.snd_lock);
rw_exit_read(&pf_state_list.pfs_rwl);
}
static void
pfsync_bulk_snd_tmo(void *arg)
{
struct pfsync_softc *sc = arg;
const unsigned int space = sc->sc_if.if_mtu -
(sizeof(struct ip) + sizeof(struct pfsync_header));
struct mbuf *m;
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL) {
/* some error++ */
/* retry later */
timeout_add_msec(&sc->sc_bulk_snd.snd_tmo,
PFSYNC_BULK_SND_IVAL_MS);
return;
}
MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_if.if_mtu);
if (!ISSET(m->m_flags, M_EXT)) {
/* some error++ */
m_freem(m);
/* retry later */
timeout_add_msec(&sc->sc_bulk_snd.snd_tmo,
PFSYNC_BULK_SND_IVAL_MS);
return;
}
m_align(m, space);
m->m_len = 0;
rw_enter_read(&pf_state_list.pfs_rwl);
rw_enter_write(&sc->sc_bulk_snd.snd_lock);
if (sc->sc_bulk_snd.snd_next == NULL) {
/* there was no space in the previous packet for a BUS END */
if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_END) == 0)
panic("%s: mtu is too low", __func__);
/* this bulk is done */
pfsync_dprintf(sc, "bulk send done (%s)", __func__);
sc->sc_bulk_snd.snd_again = 0; /* XXX */
sc->sc_bulk_snd.snd_tail = NULL;
} else {
pfsync_dprintf(sc, "bulk send again (%s)", __func__);
/* fill it up with state updates. */
pfsync_bulk_snd_sub(sc, m, space);
}
m->m_pkthdr.len = m->m_len;
m = pfsync_encap(sc, m);
rw_exit_write(&sc->sc_bulk_snd.snd_lock);
rw_exit_read(&pf_state_list.pfs_rwl);
if (m != NULL) {
NET_LOCK();
pfsync_sendout(sc, m);
NET_UNLOCK();
}
}
static void
pfsync_update_state_req(struct pfsync_softc *sc, struct pf_state *st)
{
struct pfsync_slice *s = pfsync_slice_enter(sc, st);
switch (st->sync_state) {
case PFSYNC_S_UPD_C:
case PFSYNC_S_IACK:
pfsync_q_del(s, st);
/* FALLTHROUGH */
case PFSYNC_S_NONE:
pfsync_q_ins(s, st, PFSYNC_S_UPD);
break;
case PFSYNC_S_INS:
case PFSYNC_S_UPD:
case PFSYNC_S_DEL:
/* we're already handling it */
break;
default:
panic("%s: state %p unexpected sync_state %d",
__func__, st, st->sync_state);
}
pfsync_slice_sched(s);
pfsync_slice_leave(sc, s);
}
#if defined(IPSEC)
static void
pfsync_out_tdb(struct tdb *tdb, void *buf)
{
struct pfsync_tdb *ut = buf;
memset(ut, 0, sizeof(*ut));
ut->spi = tdb->tdb_spi;
memcpy(&ut->dst, &tdb->tdb_dst, sizeof(ut->dst));
/*
* When a failover happens, the master's rpl is probably above
* what we see here (we may be up to a second late), so
* increase it a bit for outbound tdbs to manage most such
* situations.
*
* For now, just add an offset that is likely to be larger
* than the number of packets we can see in one second. The RFC
* just says the next packet must have a higher seq value.
*
* XXX What is a good algorithm for this? We could use
* a rate-determined increase, but to know it, we would have
* to extend struct tdb.
* XXX pt->rpl can wrap over MAXINT, but if so the real tdb
* will soon be replaced anyway. For now, just don't handle
* this edge case.
*/
#define RPL_INCR 16384
ut->rpl = htobe64(tdb->tdb_rpl +
(ISSET(tdb->tdb_flags, TDBF_PFSYNC_RPL) ? RPL_INCR : 0));
ut->cur_bytes = htobe64(tdb->tdb_cur_bytes);
ut->sproto = tdb->tdb_sproto;
ut->rdomain = htons(tdb->tdb_rdomain);
}
static struct pfsync_slice *
pfsync_slice_enter_tdb(struct pfsync_softc *sc, const struct tdb *t)
{
/*
* just use the first slice for all ipsec (for now) until
* it's more obvious what property (eg, spi) we can distribute
* tdbs over slices with.
*/
struct pfsync_slice *s = &sc->sc_slices[0];
if (!mtx_enter_try(&s->s_mtx)) {
mtx_enter(&s->s_mtx);
s->s_stat_contended++;
}
s->s_stat_locks++;
return (s);
}
static void
pfsync_tdb_ins(struct pfsync_slice *s, struct tdb *tdb)
{
size_t nlen = sizeof(struct pfsync_tdb);
struct mbuf *m = NULL;
KASSERT(s->s_len >= PFSYNC_MINPKT);
MUTEX_ASSERT_LOCKED(&s->s_mtx);
MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
if (TAILQ_EMPTY(&s->s_tdb_q))
nlen += sizeof(struct pfsync_subheader);
if (s->s_len + nlen > s->s_pfsync->sc_if.if_mtu) {
m = pfsync_slice_write(s);
if (m != NULL) {
s->s_stat_enqueue++;
if (mq_enqueue(&s->s_sendq, m) == 0)
task_add(s->s_softnet, &s->s_send);
}
nlen = sizeof(struct pfsync_subheader) +
sizeof(struct pfsync_tdb);
}
s->s_len += nlen;
TAILQ_INSERT_TAIL(&s->s_tdb_q, tdb, tdb_sync_entry);
tdb->tdb_updates = 0;
if (!timeout_pending(&s->s_tmo))
timeout_add_sec(&s->s_tmo, 1);
}
static void
pfsync_tdb_del(struct pfsync_slice *s, struct tdb *tdb)
{
MUTEX_ASSERT_LOCKED(&s->s_mtx);
MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
s->s_len -= sizeof(struct pfsync_tdb);
if (TAILQ_EMPTY(&s->s_tdb_q))
s->s_len -= sizeof(struct pfsync_subheader);
}
/*
* the reference that pfsync has to a tdb is accounted for by the
* TDBF_PFSYNC flag, not by tdb_ref/tdb_unref. tdb_delete_tdb() is
* called after all other references to a tdb are dropped (with
* tdb_unref) as part of the tdb_free().
*
* tdb_free() needs to wait for pfsync to let go of the tdb though,
* which would be best handled by a reference count, but tdb_free
* needs the NET_LOCK which pfsync is already fighting with. instead
* use the TDBF_PFSYNC_SNAPPED flag to coordinate the pfsync write/drop
* with tdb_free.
*/
void
pfsync_update_tdb(struct tdb *tdb, int output)
{
struct pfsync_softc *sc;
MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc != NULL) {
struct pfsync_slice *s = pfsync_slice_enter_tdb(sc, tdb);
/* TDBF_PFSYNC is only changed while the slice mtx is held */
if (!ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
mtx_enter(&tdb->tdb_mtx);
SET(tdb->tdb_flags, TDBF_PFSYNC);
mtx_leave(&tdb->tdb_mtx);
pfsync_tdb_ins(s, tdb);
} else if (++tdb->tdb_updates >= sc->sc_maxupdates)
pfsync_slice_sched(s);
/* XXX no sync timestamp on tdbs to check */
pfsync_slice_leave(sc, s);
}
smr_read_leave();
}
void
pfsync_delete_tdb(struct tdb *tdb)
{
struct pfsync_softc *sc;
MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc != NULL) {
struct pfsync_slice *s = pfsync_slice_enter_tdb(sc, tdb);
/* TDBF_PFSYNC is only changed while the slice mtx is held */
if (ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
pfsync_tdb_del(s, tdb);
mtx_enter(&tdb->tdb_mtx);
CLR(tdb->tdb_flags, TDBF_PFSYNC);
mtx_leave(&tdb->tdb_mtx);
}
pfsync_slice_leave(sc, s);
}
smr_read_leave();
/*
* handle pfsync_slice_drop being called from pfsync_down
* and the smr/slice access above won't work.
*/
mtx_enter(&tdb->tdb_mtx);
SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); /* like a thanos snap */
while (ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
msleep_nsec(&tdb->tdb_updates, &tdb->tdb_mtx, PWAIT,
"tdbfree", INFSLP);
}
mtx_leave(&tdb->tdb_mtx);
}
#endif /* defined(IPSEC) */
struct pfsync_act {
void (*in)(struct pfsync_softc *, const caddr_t,
unsigned int, unsigned int);
size_t len;
};
static void pfsync_in_clr(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_iack(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_upd_c(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_ureq(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_del(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_del_c(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_bus(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_tdb(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_ins(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static void pfsync_in_upd(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
static const struct pfsync_act pfsync_acts[] = {
[PFSYNC_ACT_CLR] =
{ pfsync_in_clr, sizeof(struct pfsync_clr) },
[PFSYNC_ACT_INS_ACK] =
{ pfsync_in_iack, sizeof(struct pfsync_ins_ack) },
[PFSYNC_ACT_UPD_C] =
{ pfsync_in_upd_c, sizeof(struct pfsync_upd_c) },
[PFSYNC_ACT_UPD_REQ] =
{ pfsync_in_ureq, sizeof(struct pfsync_upd_req) },
[PFSYNC_ACT_DEL] =
{ pfsync_in_del, sizeof(struct pfsync_state) },
[PFSYNC_ACT_DEL_C] =
{ pfsync_in_del_c, sizeof(struct pfsync_del_c) },
[PFSYNC_ACT_BUS] =
{ pfsync_in_bus, sizeof(struct pfsync_bus) },
[PFSYNC_ACT_INS] =
{ pfsync_in_ins, sizeof(struct pfsync_state) },
[PFSYNC_ACT_UPD] =
{ pfsync_in_upd, sizeof(struct pfsync_state) },
[PFSYNC_ACT_TDB] =
{ pfsync_in_tdb, sizeof(struct pfsync_tdb) },
};
static void
pfsync_in_skip(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
/* nop */
}
static struct mbuf *
pfsync_input(struct mbuf *m, uint8_t ttl, unsigned int hlen)
{
struct pfsync_softc *sc;
struct pfsync_header *ph;
struct pfsync_subheader *subh;
unsigned int len;
void (*in)(struct pfsync_softc *,
const caddr_t, unsigned int, unsigned int);
pfsyncstat_inc(pfsyncs_ipackets);
if (!pf_status.running)
return (m);
/*
* pfsyncif is only set if it is up and running correctly.
*/
smr_read_enter();
sc = SMR_PTR_GET(&pfsyncif);
if (sc == NULL)
goto leave;
if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) {
pfsyncstat_inc(pfsyncs_badif);
goto leave;
}
/* verify that the IP TTL is 255. */
if (ttl != PFSYNC_DFLTTL) {
pfsyncstat_inc(pfsyncs_badttl);
goto leave;
}
m_adj(m, hlen);
if (m->m_pkthdr.len < sizeof(*ph)) {
pfsyncstat_inc(pfsyncs_hdrops);
goto leave;
}
if (m->m_len < sizeof(*ph)) {
m = m_pullup(m, sizeof(*ph));
if (m == NULL)
goto leave;
}
ph = mtod(m, struct pfsync_header *);
if (ph->version != PFSYNC_VERSION) {
pfsyncstat_inc(pfsyncs_badver);
goto leave;
}
len = ntohs(ph->len);
if (m->m_pkthdr.len < len) {
pfsyncstat_inc(pfsyncs_badlen);
goto leave;
}
if (m->m_pkthdr.len > len)
m->m_pkthdr.len = len;
/* ok, it's serious now */
refcnt_take(&sc->sc_refs);
smr_read_leave();
counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes, len);
m_adj(m, sizeof(*ph));
while (m->m_pkthdr.len >= sizeof(*subh)) {
unsigned int action, mlen, count;
if (m->m_len < sizeof(*subh)) {
m = m_pullup(m, sizeof(*subh));
if (m == NULL)
goto rele;
}
subh = mtod(m, struct pfsync_subheader *);
action = subh->action;
mlen = subh->len << 2;
count = ntohs(subh->count);
if (action >= PFSYNC_ACT_MAX ||
action >= nitems(pfsync_acts) ||
mlen < pfsync_acts[subh->action].len) {
/*
* subheaders are always followed by at least one
* message, so if the peer is new
* enough to tell us how big its messages are then we
* know enough to skip them.
*/
if (count == 0 || mlen == 0) {
pfsyncstat_inc(pfsyncs_badact);
goto rele;
}
in = pfsync_in_skip;
} else {
in = pfsync_acts[action].in;
if (in == NULL)
in = pfsync_in_skip;
}
m_adj(m, sizeof(*subh));
len = mlen * count;
if (len > m->m_pkthdr.len) {
pfsyncstat_inc(pfsyncs_badlen);
goto rele;
}
if (m->m_len < len) {
m = m_pullup(m, len);
if (m == NULL)
goto rele;
}
(*in)(sc, mtod(m, caddr_t), mlen, count);
m_adj(m, len);
}
rele:
refcnt_rele_wake(&sc->sc_refs);
return (m);
leave:
smr_read_leave();
return (m);
}
static void
pfsync_in_clr(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_clr *clr;
struct pf_state *head, *tail, *st, *next;
struct pfi_kif *kif;
uint32_t creatorid;
unsigned int i;
rw_enter_read(&pf_state_list.pfs_rwl);
/* get a view of the state list */
mtx_enter(&pf_state_list.pfs_mtx);
head = TAILQ_FIRST(&pf_state_list.pfs_list);
tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
mtx_leave(&pf_state_list.pfs_mtx);
PF_LOCK();
for (i = 0; i < count; i++) {
clr = (struct pfsync_clr *)(buf + i * mlen);
creatorid = clr->creatorid;
if (clr->ifname[0] == '\0')
kif = NULL;
else {
kif = pfi_kif_find(clr->ifname);
if (kif == NULL)
continue;
}
st = NULL;
next = head;
PF_STATE_ENTER_WRITE();
while (st != tail) {
st = next;
next = TAILQ_NEXT(st, entry_list);
if (creatorid != st->creatorid)
continue;
if (kif != NULL && kif != st->kif)
continue;
mtx_enter(&st->mtx);
SET(st->state_flags, PFSTATE_NOSYNC);
mtx_leave(&st->mtx);
pf_remove_state(st);
}
PF_STATE_EXIT_WRITE();
}
PF_UNLOCK();
rw_exit_read(&pf_state_list.pfs_rwl);
}
static void
pfsync_in_ins(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_state *sp;
sa_family_t af1, af2;
unsigned int i;
PF_LOCK();
for (i = 0; i < count; i++) {
sp = (struct pfsync_state *)(buf + mlen * i);
af1 = sp->key[0].af;
af2 = sp->key[1].af;
/* check for invalid values */
if (sp->timeout >= PFTM_MAX ||
sp->src.state > PF_TCPS_PROXY_DST ||
sp->dst.state > PF_TCPS_PROXY_DST ||
sp->direction > PF_OUT ||
(((af1 || af2) &&
((af1 != AF_INET && af1 != AF_INET6) ||
(af2 != AF_INET && af2 != AF_INET6))) ||
(sp->af != AF_INET && sp->af != AF_INET6))) {
pfsyncstat_inc(pfsyncs_badval);
continue;
}
if (pf_state_import(sp, PFSYNC_SI_PFSYNC) == ENOMEM) {
/* drop out, but process the rest of the actions */
break;
}
}
PF_UNLOCK();
}
static void
pfsync_in_iack(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_ins_ack *ia;
struct pf_state_cmp id_key;
struct pf_state *st;
unsigned int i;
for (i = 0; i < count; i++) {
ia = (struct pfsync_ins_ack *)(buf + mlen * i);
id_key.id = ia->id;
id_key.creatorid = ia->creatorid;
PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
pf_state_ref(st);
PF_STATE_EXIT_READ();
if (st == NULL)
continue;
if (READ_ONCE(st->sync_defer) != NULL)
pfsync_deferred(sc, st);
pf_state_unref(st);
}
}
static int
pfsync_upd_tcp(struct pf_state *st, const struct pfsync_state_peer *src,
const struct pfsync_state_peer *dst)
{
int sync = 0;
/*
* The state should never go backwards except
* for syn-proxy states. Neither should the
* sequence window slide backwards.
*/
if ((st->src.state > src->state &&
(st->src.state < PF_TCPS_PROXY_SRC ||
src->state >= PF_TCPS_PROXY_SRC)) ||
(st->src.state == src->state &&
SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
sync++;
else
pf_state_peer_ntoh(src, &st->src);
if ((st->dst.state > dst->state) ||
(st->dst.state >= TCPS_SYN_SENT &&
SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
sync++;
else
pf_state_peer_ntoh(dst, &st->dst);
return (sync);
}
static void
pfsync_in_updates(struct pfsync_softc *sc, struct pf_state *st,
const struct pfsync_state_peer *src, const struct pfsync_state_peer *dst,
uint8_t timeout)
{
struct pf_state_scrub *sscrub = NULL;
struct pf_state_scrub *dscrub = NULL;
int sync;
if (src->scrub.scrub_flag && st->src.scrub == NULL) {
sscrub = pf_state_scrub_get();
if (sscrub == NULL) {
/* inc error? */
goto out;
}
}
if (dst->scrub.scrub_flag && st->dst.scrub == NULL) {
dscrub = pf_state_scrub_get();
if (dscrub == NULL) {
/* inc error? */
goto out;
}
}
if (READ_ONCE(st->sync_defer) != NULL)
pfsync_deferred(sc, st);
mtx_enter(&st->mtx);
/* attach the scrub memory if needed */
if (sscrub != NULL && st->src.scrub == NULL) {
st->src.scrub = sscrub;
sscrub = NULL;
}
if (dscrub != NULL && st->dst.scrub == NULL) {
st->dst.scrub = dscrub;
dscrub = NULL;
}
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
sync = pfsync_upd_tcp(st, src, dst);
else {
sync = 0;
/*
* Non-TCP protocol state machine always go
* forwards
*/
if (st->src.state > src->state)
sync++;
else
pf_state_peer_ntoh(src, &st->src);
if (st->dst.state > dst->state)
sync++;
else
pf_state_peer_ntoh(dst, &st->dst);
}
st->pfsync_time = getuptime();
if (sync < 2) {
st->expire = st->pfsync_time;
st->timeout = timeout;
}
mtx_leave(&st->mtx);
if (sync) {
pfsyncstat_inc(pfsyncs_stale);
pfsync_update_state(st);
}
out:
if (sscrub != NULL)
pf_state_scrub_put(sscrub);
if (dscrub != NULL)
pf_state_scrub_put(dscrub);
}
static void
pfsync_in_upd(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_state *sp;
struct pf_state_cmp id_key;
struct pf_state *st;
int error;
unsigned int i;
for (i = 0; i < count; i++) {
sp = (struct pfsync_state *)(buf + mlen * i);
/* check for invalid values */
if (sp->timeout >= PFTM_MAX ||
sp->src.state > PF_TCPS_PROXY_DST ||
sp->dst.state > PF_TCPS_PROXY_DST) {
pfsyncstat_inc(pfsyncs_badval);
continue;
}
id_key.id = sp->id;
id_key.creatorid = sp->creatorid;
PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
pf_state_ref(st);
PF_STATE_EXIT_READ();
if (st == NULL) {
/* insert the update */
PF_LOCK();
error = pf_state_import(sp, PFSYNC_SI_PFSYNC);
if (error)
pfsyncstat_inc(pfsyncs_badstate);
PF_UNLOCK();
continue;
}
pfsync_in_updates(sc, st, &sp->src, &sp->dst, sp->timeout);
pf_state_unref(st);
}
}
static struct mbuf *
pfsync_upd_req_init(struct pfsync_softc *sc, unsigned int count)
{
struct mbuf *m;
unsigned int mlen;
m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL) {
pfsyncstat_inc(pfsyncs_onomem);
return (NULL);
}
mlen = max_linkhdr + sizeof(sc->sc_template) +
sizeof(struct pfsync_header) +
sizeof(struct pfsync_subheader) +
sizeof(struct pfsync_upd_req) * count;
if (mlen > MHLEN) {
MCLGETL(m, M_DONTWAIT, mlen);
if (!ISSET(m->m_flags, M_EXT)) {
m_freem(m);
return (NULL);
}
}
m_align(m, 0);
m->m_len = 0;
return (m);
}
static void
pfsync_in_upd_c(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_upd_c *up;
struct pf_state_cmp id_key;
struct pf_state *st;
unsigned int i;
struct mbuf *m = NULL;
unsigned int rcount = 0;
for (i = 0; i < count; i++) {
up = (struct pfsync_upd_c *)(buf + mlen * i);
/* check for invalid values */
if (up->timeout >= PFTM_MAX ||
up->src.state > PF_TCPS_PROXY_DST ||
up->dst.state > PF_TCPS_PROXY_DST) {
pfsyncstat_inc(pfsyncs_badval);
continue;
}
id_key.id = up->id;
id_key.creatorid = up->creatorid;
PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
pf_state_ref(st);
PF_STATE_EXIT_READ();
if (st == NULL) {
/* We don't have this state. Ask for it. */
struct pfsync_upd_req *ur;
if (m == NULL) {
m = pfsync_upd_req_init(sc, count);
if (m == NULL) {
pfsyncstat_inc(pfsyncs_onomem);
continue;
}
}
m = m_prepend(m, sizeof(*ur), M_DONTWAIT);
if (m == NULL) {
pfsyncstat_inc(pfsyncs_onomem);
continue;
}
ur = mtod(m, struct pfsync_upd_req *);
ur->id = up->id;
ur->creatorid = up->creatorid;
rcount++;
continue;
}
pfsync_in_updates(sc, st, &up->src, &up->dst, up->timeout);
pf_state_unref(st);
}
if (m != NULL) {
struct pfsync_subheader *subh;
m = m_prepend(m, sizeof(*subh), M_DONTWAIT);
if (m == NULL) {
pfsyncstat_inc(pfsyncs_onomem);
return;
}
subh = mtod(m, struct pfsync_subheader *);
subh->action = PFSYNC_ACT_UPD_REQ;
subh->len = sizeof(struct pfsync_upd_req) >> 2;
subh->count = htons(rcount);
m = pfsync_encap(sc, m);
if (m == NULL) {
pfsyncstat_inc(pfsyncs_onomem);
return;
}
pfsync_sendout(sc, m);
}
}
static void
pfsync_in_ureq(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_upd_req *ur;
struct pf_state_cmp id_key;
struct pf_state *st;
unsigned int i;
for (i = 0; i < count; i++) {
ur = (struct pfsync_upd_req *)(buf + mlen * i);
id_key.id = ur->id;
id_key.creatorid = ur->creatorid;
if (id_key.id == 0 && id_key.creatorid == 0) {
pfsync_bulk_snd_start(sc);
continue;
}
PF_STATE_ENTER_READ();
st = pf_find_state_byid(&id_key);
if (st != NULL && st->timeout < PFTM_MAX &&
!ISSET(st->state_flags, PFSTATE_NOSYNC))
pf_state_ref(st);
else
st = NULL;
PF_STATE_EXIT_READ();
if (st == NULL) {
pfsyncstat_inc(pfsyncs_badstate);
continue;
}
pfsync_update_state_req(sc, st);
pf_state_unref(st);
}
}
static void
pfsync_in_del(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_state *sp;
struct pf_state_cmp id_key;
struct pf_state *st;
unsigned int i;
PF_LOCK();
PF_STATE_ENTER_WRITE();
for (i = 0; i < count; i++) {
sp = (struct pfsync_state *)(buf + mlen * i);
id_key.id = sp->id;
id_key.creatorid = sp->creatorid;
st = pf_find_state_byid(&id_key);
if (st == NULL) {
pfsyncstat_inc(pfsyncs_badstate);
continue;
}
mtx_enter(&st->mtx);
SET(st->state_flags, PFSTATE_NOSYNC);
mtx_leave(&st->mtx);
pf_remove_state(st);
}
PF_STATE_EXIT_WRITE();
PF_UNLOCK();
}
static void
pfsync_in_del_c(struct pfsync_softc *sc,
const caddr_t buf, unsigned int mlen, unsigned int count)
{
const struct pfsync_del_c *sp;
struct pf_state_cmp id_key;
struct pf_state *st;
unsigned int i;
PF_LOCK();
PF_STATE_ENTER_WRITE();
for (i = 0; i < count; i++) {
sp = (struct pfsync_del_c *)(buf + mlen * i);
id_key.id = sp->id;
id_key.creatorid = sp->creatorid;
st = pf_find_state_byid(&id_key);
if (st == NULL) {
pfsyncstat_inc(pfsyncs_badstate);
continue;
}
mtx_enter(&st->mtx);
SET(st->state_flags, PFSTATE_NOSYNC);
mtx_leave(&st->mtx);
pf_remove_state(st);
}
PF_STATE_EXIT_WRITE();
PF_UNLOCK();
}
static void
pfsync_in_bus(struct pfsync_softc *sc,
const caddr_t buf, unsigned int len, unsigned int count)
{
const struct pfsync_bus *bus = (struct pfsync_bus *)buf;
switch (bus->status) {
case PFSYNC_BUS_START:
pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_BUS_START);
break;
case PFSYNC_BUS_END:
pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_BUS_END);
break;
}
}
#if defined(IPSEC)
/* Update an in-kernel tdb. Silently fail if no tdb is found. */
static void
pfsync_update_net_tdb(const struct pfsync_tdb *pt)
{
struct tdb *tdb;
NET_ASSERT_LOCKED();
/* check for invalid values */
if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
(pt->dst.sa.sa_family != AF_INET &&
pt->dst.sa.sa_family != AF_INET6))
goto bad;
tdb = gettdb(ntohs(pt->rdomain), pt->spi,
(union sockaddr_union *)&pt->dst, pt->sproto);
if (tdb) {
uint64_t rpl = betoh64(pt->rpl);
uint64_t cur_bytes = betoh64(pt->cur_bytes);
/* Neither replay nor byte counter should ever decrease. */
mtx_enter(&tdb->tdb_mtx);
if (rpl >= tdb->tdb_rpl &&
cur_bytes >= tdb->tdb_cur_bytes) {
tdb->tdb_rpl = rpl;
tdb->tdb_cur_bytes = cur_bytes;
}
mtx_leave(&tdb->tdb_mtx);
tdb_unref(tdb);
}
return;
bad:
DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
"invalid value");
pfsyncstat_inc(pfsyncs_badstate);
return;
}
#endif
static void
pfsync_in_tdb(struct pfsync_softc *sc,
const caddr_t buf, unsigned int len, unsigned int count)
{
#if defined(IPSEC)
const struct pfsync_tdb *tp;
unsigned int i;
for (i = 0; i < count; i++) {
tp = (const struct pfsync_tdb *)(buf + len * i);
pfsync_update_net_tdb(tp);
}
#endif
}
int
pfsync_input4(struct mbuf **mp, int *offp, int proto, int af)
{
struct mbuf *m = *mp;
struct ip *ip;
ip = mtod(m, struct ip *);
m = pfsync_input(m, ip->ip_ttl, ip->ip_hl << 2);
m_freem(m);
*mp = NULL;
return (IPPROTO_DONE);
}
int
pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp)
{
struct pfsyncstats pfsyncstat;
CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t)));
memset(&pfsyncstat, 0, sizeof pfsyncstat);
counters_read(pfsynccounters, (uint64_t *)&pfsyncstat,
pfsyncs_ncounters, NULL);
return (sysctl_rdstruct(oldp, oldlenp, newp,
&pfsyncstat, sizeof(pfsyncstat)));
}
int
pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
/* All sysctl names at this level are terminal. */
if (namelen != 1)
return (ENOTDIR);
switch (name[0]) {
case PFSYNCCTL_STATS:
return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp));
default:
return (ENOPROTOOPT);
}
}