src/sys/net/pfvar_priv.h

434 lines
12 KiB
C

/* $OpenBSD: pfvar_priv.h,v 1.36 2024/04/22 13:30:22 bluhm Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
* Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org>
* Copyright (c) 2016 Alexander Bluhm <bluhm@openbsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _NET_PFVAR_PRIV_H_
#define _NET_PFVAR_PRIV_H_
#ifdef _KERNEL
#include <sys/rwlock.h>
#include <sys/mutex.h>
#include <sys/percpu.h>
/*
* Locks used to protect struct members in this file:
* L pf_inp_mtx link pf to inp mutex
*/
struct pfsync_deferral;
/*
* pf state items - links from pf_state_key to pf_states
*/
struct pf_state_item {
TAILQ_ENTRY(pf_state_item)
si_entry;
struct pf_state *si_st;
};
TAILQ_HEAD(pf_statelisthead, pf_state_item);
/*
* pf state keys - look up states by address
*/
struct pf_state_key {
struct pf_addr addr[2];
u_int16_t port[2];
u_int16_t rdomain;
u_int16_t hash;
sa_family_t af;
u_int8_t proto;
RB_ENTRY(pf_state_key) sk_entry;
struct pf_statelisthead sk_states;
struct pf_state_key *sk_reverse;
struct inpcb *sk_inp; /* [L] */
pf_refcnt_t sk_refcnt;
u_int8_t sk_removed;
};
RBT_HEAD(pf_state_tree, pf_state_key);
RBT_PROTOTYPE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key);
#define PF_REVERSED_KEY(key, family) \
((key[PF_SK_WIRE]->af != key[PF_SK_STACK]->af) && \
(key[PF_SK_WIRE]->af != (family)))
/*
* pf state
*
* Protection/ownership of pf_state members:
* I immutable after pf_state_insert()
* M pf_state mtx
* P PF_STATE_LOCK
* S pfsync
* L pf_state_list
* g pf_purge gc
*/
struct pf_state {
u_int64_t id; /* [I] */
u_int32_t creatorid; /* [I] */
u_int8_t direction; /* [I] */
u_int8_t pad[3];
TAILQ_ENTRY(pf_state) sync_list; /* [S] */
struct pfsync_deferral *sync_defer; /* [S] */
TAILQ_ENTRY(pf_state) entry_list; /* [L] */
SLIST_ENTRY(pf_state) gc_list; /* [g] */
RB_ENTRY(pf_state) entry_id; /* [P] */
struct pf_state_peer src;
struct pf_state_peer dst;
struct pf_rule_slist match_rules; /* [I] */
union pf_rule_ptr rule; /* [I] */
union pf_rule_ptr anchor; /* [I] */
union pf_rule_ptr natrule; /* [I] */
struct pf_addr rt_addr; /* [I] */
struct pf_sn_head src_nodes; /* [I] */
struct pf_state_key *key[2]; /* [I] stack and wire */
struct pfi_kif *kif; /* [I] */
struct mutex mtx;
pf_refcnt_t refcnt;
u_int64_t packets[2];
u_int64_t bytes[2];
int32_t creation; /* [I] */
int32_t expire;
int32_t pfsync_time; /* [S] */
int rtableid[2]; /* [I] stack and wire */
u_int16_t qid; /* [I] */
u_int16_t pqid; /* [I] */
u_int16_t tag; /* [I] */
u_int16_t state_flags; /* [M] */
u_int8_t log; /* [I] */
u_int8_t timeout;
u_int8_t sync_state; /* [S] PFSYNC_S_x */
u_int8_t sync_updates; /* [S] */
u_int8_t min_ttl; /* [I] */
u_int8_t set_tos; /* [I] */
u_int8_t set_prio[2]; /* [I] */
u_int16_t max_mss; /* [I] */
u_int16_t if_index_in; /* [I] */
u_int16_t if_index_out; /* [I] */
u_int16_t delay; /* [I] */
u_int8_t rt; /* [I] */
};
RBT_HEAD(pf_state_tree_id, pf_state);
RBT_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id);
extern struct pf_state_tree_id tree_id;
/*
* states are linked into a global list to support the following
* functionality:
*
* - garbage collection
* - pfsync bulk send operations
* - bulk state fetches via the DIOCGETSTATES ioctl
* - bulk state clearing via the DIOCCLRSTATES ioctl
*
* states are inserted into the global pf_state_list once it has also
* been successfully added to the various trees that make up the state
* table. states are only removed from the pf_state_list by the garbage
* collection process.
*
* the pf_state_list head and tail pointers (ie, the pfs_list TAILQ_HEAD
* structure) and the pointers between the entries on the pf_state_list
* are locked separately. at a high level, this allows for insertion
* of new states into the pf_state_list while other contexts (eg, the
* ioctls) are traversing the state items in the list. for garbage
* collection to remove items from the pf_state_list, it has to exclude
* both modifications to the list head and tail pointers, and traversal
* of the links between the states.
*
* the head and tail pointers are protected by a mutex. the pointers
* between states are protected by an rwlock.
*
* because insertions are only made to the end of the list, if we get
* a snapshot of the head and tail of the list and prevent modifications
* to the links between states, we can safely traverse between the
* head and tail entries. subsequent insertions can add entries after
* our view of the tail, but we don't look past our view.
*
* if both locks must be taken, the rwlock protecting the links between
* states is taken before the mutex protecting the head and tail
* pointer.
*
* insertion into the list follows this pattern:
*
* // serialise list head/tail modifications
* mtx_enter(&pf_state_list.pfs_mtx);
* TAILQ_INSERT_TAIL(&pf_state_list.pfs_list, state, entry_list);
* mtx_leave(&pf_state_list.pfs_mtx);
*
* traversal of the list:
*
* // lock against the gc removing an item from the list
* rw_enter_read(&pf_state_list.pfs_rwl);
*
* // get a snapshot view of the ends of the list
* mtx_enter(&pf_state_list.pfs_mtx);
* head = TAILQ_FIRST(&pf_state_list.pfs_list);
* tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
* mtx_leave(&pf_state_list.pfs_mtx);
*
* state = NULL;
* next = head;
*
* while (state != tail) {
* state = next;
* next = TAILQ_NEXT(state, entry_list);
*
* // look at the state
* }
*
* rw_exit_read(&pf_state_list.pfs_rwl);
*
* removing an item from the list:
*
* // wait for iterators (readers) to get out
* rw_enter_write(&pf_state_list.pfs_rwl);
*
* // serialise list head/tail modifications
* mtx_enter(&pf_state_list.pfs_mtx);
* TAILQ_REMOVE(&pf_state_list.pfs_list, state, entry_list);
* mtx_leave(&pf_state_list.pfs_mtx);
*
* rw_exit_write(&pf_state_list.pfs_rwl);
*
* the lock ordering for pf_state_list locks and the rest of the pf
* locks are:
*
* 1. KERNEL_LOCK
* 2. NET_LOCK
* 3. pf_state_list.pfs_rwl
* 4. PF_LOCK
* 5. PF_STATE_LOCK
* 6. pf_state_list.pfs_mtx
*/
struct pf_state_list {
/* the list of states in the system */
struct pf_state_queue pfs_list;
/* serialise pfs_list head/tail access */
struct mutex pfs_mtx;
/* serialise access to pointers between pfs_list entries */
struct rwlock pfs_rwl;
};
#define PF_STATE_LIST_INITIALIZER(_pfs) { \
.pfs_list = TAILQ_HEAD_INITIALIZER(_pfs.pfs_list), \
.pfs_mtx = MUTEX_INITIALIZER(IPL_SOFTNET), \
.pfs_rwl = RWLOCK_INITIALIZER("pfstates"), \
}
extern struct rwlock pf_lock;
struct pf_pdesc {
struct {
int done;
uid_t uid;
gid_t gid;
pid_t pid;
} lookup;
u_int64_t tot_len; /* Make Mickey money */
struct pf_addr nsaddr; /* src address after NAT */
struct pf_addr ndaddr; /* dst address after NAT */
struct pfi_kif *kif; /* incoming interface */
struct mbuf *m; /* mbuf containing the packet */
struct pf_addr *src; /* src address */
struct pf_addr *dst; /* dst address */
u_int16_t *pcksum; /* proto cksum */
u_int16_t *sport;
u_int16_t *dport;
u_int16_t osport;
u_int16_t odport;
u_int16_t hash;
u_int16_t nsport; /* src port after NAT */
u_int16_t ndport; /* dst port after NAT */
u_int32_t off; /* protocol header offset */
u_int32_t hdrlen; /* protocol header length */
u_int32_t p_len; /* length of protocol payload */
u_int32_t extoff; /* extension header offset */
u_int32_t fragoff; /* fragment header offset */
u_int32_t jumbolen; /* length from v6 jumbo header */
u_int32_t badopts; /* v4 options or v6 routing headers */
#define PF_OPT_OTHER 0x0001
#define PF_OPT_JUMBO 0x0002
#define PF_OPT_ROUTER_ALERT 0x0004
u_int16_t rdomain; /* original routing domain */
u_int16_t virtual_proto;
#define PF_VPROTO_FRAGMENT 256
sa_family_t af;
sa_family_t naf;
u_int8_t proto;
u_int8_t tos;
u_int8_t ttl;
u_int8_t dir; /* direction */
u_int8_t sidx; /* key index for source */
u_int8_t didx; /* key index for destination */
u_int8_t destchg; /* flag set when destination changed */
u_int8_t pflog; /* flags for packet logging */
union {
struct tcphdr tcp;
struct udphdr udp;
struct icmp icmp;
#ifdef INET6
struct icmp6_hdr icmp6;
struct mld_hdr mld;
struct nd_neighbor_solicit nd_ns;
#endif /* INET6 */
} hdr;
};
struct pf_anchor_stackframe {
struct pf_ruleset *sf_rs;
union {
struct pf_rule *u_r;
struct pf_anchor_stackframe *u_stack_top;
} u;
struct pf_anchor *sf_child;
int sf_jump_target;
};
#define sf_r u.u_r
#define sf_stack_top u.u_stack_top
enum {
PF_NEXT_RULE,
PF_NEXT_CHILD
};
extern struct cpumem *pf_anchor_stack;
enum pf_trans_type {
PF_TRANS_NONE,
PF_TRANS_GETRULE,
PF_TRANS_MAX
};
struct pf_trans {
LIST_ENTRY(pf_trans) pft_entry;
uint32_t pft_unit; /* process id */
uint64_t pft_ticket;
enum pf_trans_type pft_type;
union {
struct {
u_int32_t gr_version;
struct pf_anchor *gr_anchor;
struct pf_rule *gr_rule;
} u_getrule;
} u;
};
#define pftgr_version u.u_getrule.gr_version
#define pftgr_anchor u.u_getrule.gr_anchor
#define pftgr_rule u.u_getrule.gr_rule
extern struct timeout pf_purge_states_to;
extern struct task pf_purge_task;
extern struct timeout pf_purge_to;
struct pf_state *pf_state_ref(struct pf_state *);
void pf_state_unref(struct pf_state *);
extern struct rwlock pf_lock;
extern struct rwlock pf_state_lock;
extern struct mutex pf_frag_mtx;
extern struct mutex pf_inp_mtx;
#define PF_LOCK() do { \
rw_enter_write(&pf_lock); \
} while (0)
#define PF_UNLOCK() do { \
PF_ASSERT_LOCKED(); \
rw_exit_write(&pf_lock); \
} while (0)
#define PF_ASSERT_LOCKED() do { \
if (rw_status(&pf_lock) != RW_WRITE) \
splassert_fail(RW_WRITE, \
rw_status(&pf_lock),__func__);\
} while (0)
#define PF_ASSERT_UNLOCKED() do { \
if (rw_status(&pf_lock) == RW_WRITE) \
splassert_fail(0, rw_status(&pf_lock), __func__);\
} while (0)
#define PF_STATE_ENTER_READ() do { \
rw_enter_read(&pf_state_lock); \
} while (0)
#define PF_STATE_EXIT_READ() do { \
rw_exit_read(&pf_state_lock); \
} while (0)
#define PF_STATE_ENTER_WRITE() do { \
rw_enter_write(&pf_state_lock); \
} while (0)
#define PF_STATE_EXIT_WRITE() do { \
PF_STATE_ASSERT_LOCKED(); \
rw_exit_write(&pf_state_lock); \
} while (0)
#define PF_STATE_ASSERT_LOCKED() do { \
if (rw_status(&pf_state_lock) != RW_WRITE)\
splassert_fail(RW_WRITE, \
rw_status(&pf_state_lock), __func__);\
} while (0)
#define PF_FRAG_LOCK() mtx_enter(&pf_frag_mtx)
#define PF_FRAG_UNLOCK() mtx_leave(&pf_frag_mtx)
/* for copies to/from network byte order */
void pf_state_peer_hton(const struct pf_state_peer *,
struct pfsync_state_peer *);
void pf_state_peer_ntoh(const struct pfsync_state_peer *,
struct pf_state_peer *);
u_int16_t pf_pkt_hash(sa_family_t, uint8_t,
const struct pf_addr *, const struct pf_addr *,
uint16_t, uint16_t);
#endif /* _KERNEL */
#endif /* _NET_PFVAR_PRIV_H_ */