From 7539b04ed7a462afde4d3aaae7f6ba234a868158 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 22 Aug 2021 22:19:06 +0300 Subject: [PATCH 01/25] ipsec_newpolicies(): do not call key_freesp() with NULL value Sponsored by: NVIDIA networking MFC after: 1 week --- sys/net/if_ipsec.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/net/if_ipsec.c b/sys/net/if_ipsec.c index 5f43225bfaed..b503d0696691 100644 --- a/sys/net/if_ipsec.c +++ b/sys/net/if_ipsec.c @@ -901,8 +901,10 @@ ipsec_newpolicies(struct ipsec_softc *sc, struct secpolicy *sp[IPSEC_SPCOUNT], } return (0); fail: - for (i = 0; i < IPSEC_SPCOUNT; i++) - key_freesp(&sp[i]); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + if (sp[i] != NULL) + key_freesp(&sp[i]); + } return (ENOMEM); } From 54ac7b969f8683b1398625cc283eb8383f95b263 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 25 Dec 2022 23:56:45 +0200 Subject: [PATCH 02/25] ipsec: make key_do_allocsp() global Sponsored by: NVIDIA networking --- sys/netipsec/key.c | 1 - sys/netipsec/key.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index a077f8007e47..501f5c0a7339 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -622,7 +622,6 @@ static struct callout key_timer; static void key_unlink(struct secpolicy *); static void key_detach(struct secpolicy *); -static struct secpolicy *key_do_allocsp(struct secpolicyindex *spidx, u_int dir); static struct secpolicy *key_getsp(struct secpolicyindex *); static struct secpolicy *key_getspbyid(u_int32_t); static struct mbuf *key_gather_mbuf(struct mbuf *, diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h index 5c46c00feb39..d62426e6733e 100644 --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -49,6 +49,7 @@ struct xformsw; struct secpolicy *key_newsp(void); struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); +struct secpolicy *key_do_allocsp(struct secpolicyindex *spidx, u_int dir); struct secpolicy *key_msg2sp(struct sadb_x_policy *, size_t, int *); int key_sp2msg(struct secpolicy *, void *, size_t *); void key_addref(struct secpolicy *); From 41106f5aa032502f474fb60180a97917b12f6568 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 13 Mar 2023 02:22:21 +0200 Subject: [PATCH 03/25] netipsec/xform_esp.c: make esp_ctr_compatibility global Sponsored by: NVIDIA networking --- sys/netipsec/esp_var.h | 2 ++ sys/netipsec/xform_esp.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sys/netipsec/esp_var.h b/sys/netipsec/esp_var.h index bbcea005fcf3..c1f492913107 100644 --- a/sys/netipsec/esp_var.h +++ b/sys/netipsec/esp_var.h @@ -74,6 +74,8 @@ struct espstat { #include VNET_DECLARE(int, esp_enable); +VNET_DECLARE(int, esp_ctr_compatibility); +#define V_esp_ctr_compatibility VNET(esp_ctr_compatibility) VNET_PCPUSTAT_DECLARE(struct espstat, espstat); #define ESPSTAT_ADD(name, val) \ diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c index 7ad6085db87f..599134f056ca 100644 --- a/sys/netipsec/xform_esp.c +++ b/sys/netipsec/xform_esp.c @@ -83,8 +83,7 @@ #define SPI_SIZE 4 VNET_DEFINE(int, esp_enable) = 1; -VNET_DEFINE_STATIC(int, esp_ctr_compatibility) = 1; -#define V_esp_ctr_compatibility VNET(esp_ctr_compatibility) +VNET_DEFINE(int, esp_ctr_compatibility) = 1; VNET_PCPUSTAT_DEFINE(struct espstat, espstat); VNET_PCPUSTAT_SYSINIT(espstat); From de1da299daaa4d26f5a4aba733d9b2880dc0be59 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 25 Jan 2023 12:54:47 +0200 Subject: [PATCH 04/25] ipsec_output(): add outcoming ifp argument The information about the interface is needed to coordinate inline offloading of IPSEC processing with corresponding driver. Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44223 --- sys/net/if_ipsec.c | 4 +-- sys/netinet/ip_output.c | 2 +- sys/netinet6/ip6_output.c | 2 +- sys/netipsec/ipsec.h | 5 +-- sys/netipsec/ipsec6.h | 5 +-- sys/netipsec/ipsec_output.c | 64 ++++++++++++++++++++---------------- sys/netipsec/ipsec_support.h | 11 ++++--- sys/netipsec/subr_ipsec.c | 4 +-- 8 files changed, 54 insertions(+), 43 deletions(-) diff --git a/sys/net/if_ipsec.c b/sys/net/if_ipsec.c index b503d0696691..bdf500431eff 100644 --- a/sys/net/if_ipsec.c +++ b/sys/net/if_ipsec.c @@ -415,12 +415,12 @@ ipsec_transmit(struct ifnet *ifp, struct mbuf *m) switch (af) { #ifdef INET case AF_INET: - error = ipsec4_process_packet(m, sp, NULL); + error = ipsec4_process_packet(ifp, m, sp, NULL); break; #endif #ifdef INET6 case AF_INET6: - error = ipsec6_process_packet(m, sp, NULL); + error = ipsec6_process_packet(ifp, m, sp, NULL); break; #endif default: diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 28fb651a0bc9..77708f84c3e9 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -673,7 +673,7 @@ sendit: error = ENOBUFS; goto bad; } - if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { + if ((error = IPSEC_OUTPUT(ipv4, ifp, m, inp)) != 0) { if (error == EINPROGRESS) error = 0; goto done; diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 530f86c36689..800fa691062f 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -462,7 +462,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, error = ENOBUFS; goto bad; } - if ((error = IPSEC_OUTPUT(ipv6, m, inp)) != 0) { + if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp)) != 0) { if (error == EINPROGRESS) error = 0; goto done; diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 88594d250fdb..a90953531b99 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -336,8 +336,9 @@ void ipsec_setspidx_inpcb(struct inpcb *, struct secpolicyindex *, u_int); void ipsec4_setsockaddrs(const struct mbuf *, union sockaddr_union *, union sockaddr_union *); int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); -int ipsec4_check_pmtu(struct mbuf *, struct secpolicy *, int); -int ipsec4_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); +int ipsec4_check_pmtu(struct ifnet *, struct mbuf *, struct secpolicy *, int); +int ipsec4_process_packet(struct ifnet *, struct mbuf *, struct secpolicy *, + struct inpcb *); int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, u_int); diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h index 3adb332aeb73..9c5d6e695417 100644 --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -66,8 +66,9 @@ struct secpolicy *ipsec6_checkpolicy(const struct mbuf *, void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, union sockaddr_union *); int ipsec6_common_input_cb(struct mbuf *, struct secasvar *, int, int); -int ipsec6_check_pmtu(struct mbuf *, struct secpolicy *, int); -int ipsec6_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); +int ipsec6_check_pmtu(struct ifnet *, struct mbuf *, struct secpolicy *, int); +int ipsec6_process_packet(struct ifnet *, struct mbuf *, struct secpolicy *, + struct inpcb *); int ip6_ipsec_filtertunnel(struct mbuf *); int ip6_ipsec_pcbctl(struct inpcb *, struct sockopt *); diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 707fe3421c97..08b6289ec1d5 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -110,7 +110,8 @@ static size_t ipsec_get_pmtu(struct secasvar *sav); #ifdef INET static struct secasvar * -ipsec4_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +ipsec4_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + u_int *pidx, int *error) { struct secasindex *saidx, tmpsaidx; struct ipsecrequest *isr; @@ -186,7 +187,7 @@ next: * IPsec output logic for IPv4. */ static int -ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, +ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp, u_int idx) { struct ipsec_ctx_data ctx; @@ -206,7 +207,7 @@ ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, * determine next transform. At the end of transform we can * release reference to SP. */ - sav = ipsec4_allocsa(m, sp, &idx, &error); + sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ key_freesp(&sp); @@ -288,15 +289,16 @@ bad: } int -ipsec4_process_packet(struct mbuf *m, struct secpolicy *sp, +ipsec4_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp) { - return (ipsec4_perform_request(m, sp, inp, 0)); + return (ipsec4_perform_request(ifp, m, sp, inp, 0)); } int -ipsec4_check_pmtu(struct mbuf *m, struct secpolicy *sp, int forwarding) +ipsec4_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + int forwarding) { struct secasvar *sav; struct ip *ip; @@ -317,7 +319,7 @@ ipsec4_check_pmtu(struct mbuf *m, struct secpolicy *sp, int forwarding) setdf: idx = sp->tcount - 1; - sav = ipsec4_allocsa(m, sp, &idx, &error); + sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { key_freesp(&sp); /* @@ -368,7 +370,8 @@ setdf: } static int -ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + int forwarding) { struct secpolicy *sp; int error; @@ -412,7 +415,7 @@ ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) #endif } /* NB: callee frees mbuf and releases reference to SP */ - error = ipsec4_check_pmtu(m, sp, forwarding); + error = ipsec4_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { if (error == EJUSTRETURN) return (0); @@ -420,7 +423,7 @@ ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) return (error); } - error = ipsec4_process_packet(m, sp, inp); + error = ipsec4_process_packet(ifp, m, sp, inp); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -440,7 +443,7 @@ ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) * other values - mbuf consumed by IPsec. */ int -ipsec4_output(struct mbuf *m, struct inpcb *inp) +ipsec4_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp) { /* @@ -451,7 +454,7 @@ ipsec4_output(struct mbuf *m, struct inpcb *inp) if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); - return (ipsec4_common_output(m, inp, 0)); + return (ipsec4_common_output(ifp, m, inp, 0)); } /* @@ -471,7 +474,7 @@ ipsec4_forward(struct mbuf *m) m_freem(m); return (EACCES); } - return (ipsec4_common_output(m, NULL, 1)); + return (ipsec4_common_output(NULL /* XXXKIB */, m, NULL, 1)); } #endif @@ -491,7 +494,8 @@ in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, } static struct secasvar * -ipsec6_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +ipsec6_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + u_int *pidx, int *error) { struct secasindex *saidx, tmpsaidx; struct ipsecrequest *isr; @@ -579,7 +583,7 @@ next: * IPsec output logic for IPv6. */ static int -ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, +ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp, u_int idx) { struct ipsec_ctx_data ctx; @@ -590,7 +594,7 @@ ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); - sav = ipsec6_allocsa(m, sp, &idx, &error); + sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ key_freesp(&sp); @@ -671,18 +675,19 @@ bad: } int -ipsec6_process_packet(struct mbuf *m, struct secpolicy *sp, +ipsec6_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp) { - return (ipsec6_perform_request(m, sp, inp, 0)); + return (ipsec6_perform_request(ifp, m, sp, inp, 0)); } /* * IPv6 implementation is based on IPv4 implementation. */ int -ipsec6_check_pmtu(struct mbuf *m, struct secpolicy *sp, int forwarding) +ipsec6_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + int forwarding) { struct secasvar *sav; size_t hlen, pmtu; @@ -699,7 +704,7 @@ ipsec6_check_pmtu(struct mbuf *m, struct secpolicy *sp, int forwarding) return (0); idx = sp->tcount - 1; - sav = ipsec6_allocsa(m, sp, &idx, &error); + sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { key_freesp(&sp); /* @@ -745,7 +750,8 @@ ipsec6_check_pmtu(struct mbuf *m, struct secpolicy *sp, int forwarding) } static int -ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + int forwarding) { struct secpolicy *sp; int error; @@ -779,7 +785,7 @@ ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) #endif } - error = ipsec6_check_pmtu(m, sp, forwarding); + error = ipsec6_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { if (error == EJUSTRETURN) return (0); @@ -788,7 +794,7 @@ ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) } /* NB: callee frees mbuf and releases reference to SP */ - error = ipsec6_process_packet(m, sp, inp); + error = ipsec6_process_packet(ifp, m, sp, inp); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -808,7 +814,7 @@ ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) * other values - mbuf consumed by IPsec. */ int -ipsec6_output(struct mbuf *m, struct inpcb *inp) +ipsec6_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp) { /* @@ -819,7 +825,7 @@ ipsec6_output(struct mbuf *m, struct inpcb *inp) if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); - return (ipsec6_common_output(m, inp, 0)); + return (ipsec6_common_output(ifp, m, inp, 0)); } /* @@ -839,7 +845,7 @@ ipsec6_forward(struct mbuf *m) m_freem(m); return (EACCES); } - return (ipsec6_common_output(m, NULL, 1)); + return (ipsec6_common_output(NULL /* XXXKIB */, m, NULL, 1)); } #endif /* INET6 */ @@ -916,14 +922,16 @@ ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, case AF_INET: key_freesav(&sav); IPSECSTAT_INC(ips_out_bundlesa); - return (ipsec4_perform_request(m, sp, NULL, idx)); + return (ipsec4_perform_request(NULL, m, sp, NULL, + idx)); /* NOTREACHED */ #endif #ifdef INET6 case AF_INET6: key_freesav(&sav); IPSEC6STAT_INC(ips_out_bundlesa); - return (ipsec6_perform_request(m, sp, NULL, idx)); + return (ipsec6_perform_request(NULL, m, sp, NULL, + idx)); /* NOTREACHED */ #endif /* INET6 */ default: diff --git a/sys/netipsec/ipsec_support.h b/sys/netipsec/ipsec_support.h index b7be62104d12..96d753f48f42 100644 --- a/sys/netipsec/ipsec_support.h +++ b/sys/netipsec/ipsec_support.h @@ -29,6 +29,7 @@ #ifdef _KERNEL #if defined(IPSEC) || defined(IPSEC_SUPPORT) +struct ifnet; struct mbuf; struct inpcb; struct tcphdr; @@ -58,7 +59,7 @@ int ipsec4_in_reject(const struct mbuf *, struct inpcb *); int ipsec4_input(struct mbuf *, int, int); int ipsec4_forward(struct mbuf *); int ipsec4_pcbctl(struct inpcb *, struct sockopt *); -int ipsec4_output(struct mbuf *, struct inpcb *); +int ipsec4_output(struct ifnet *, struct mbuf *, struct inpcb *); int ipsec4_capability(struct mbuf *, u_int); int ipsec4_ctlinput(ipsec_ctlinput_param_t); #endif /* INET */ @@ -68,7 +69,7 @@ int ipsec6_input(struct mbuf *, int, int); int ipsec6_in_reject(const struct mbuf *, struct inpcb *); int ipsec6_forward(struct mbuf *); int ipsec6_pcbctl(struct inpcb *, struct sockopt *); -int ipsec6_output(struct mbuf *, struct inpcb *); +int ipsec6_output(struct ifnet *, struct mbuf *, struct inpcb *); int ipsec6_capability(struct mbuf *, u_int); int ipsec6_ctlinput(ipsec_ctlinput_param_t); #endif /* INET6 */ @@ -77,7 +78,7 @@ struct ipsec_methods { int (*input)(struct mbuf *, int, int); int (*check_policy)(const struct mbuf *, struct inpcb *); int (*forward)(struct mbuf *); - int (*output)(struct mbuf *, struct inpcb *); + int (*output)(struct ifnet *, struct mbuf *, struct inpcb *); int (*pcbctl)(struct inpcb *, struct sockopt *); size_t (*hdrsize)(struct inpcb *); int (*capability)(struct mbuf *, u_int); @@ -187,8 +188,8 @@ int ipsec_kmod_input(struct ipsec_support * const, struct mbuf *, int, int); int ipsec_kmod_check_policy(struct ipsec_support * const, struct mbuf *, struct inpcb *); int ipsec_kmod_forward(struct ipsec_support * const, struct mbuf *); -int ipsec_kmod_output(struct ipsec_support * const, struct mbuf *, - struct inpcb *); +int ipsec_kmod_output(struct ipsec_support * const, struct ifnet *, + struct mbuf *, struct inpcb *); int ipsec_kmod_pcbctl(struct ipsec_support * const, struct inpcb *, struct sockopt *); int ipsec_kmod_capability(struct ipsec_support * const, struct mbuf *, u_int); diff --git a/sys/netipsec/subr_ipsec.c b/sys/netipsec/subr_ipsec.c index a1eb8f220525..46b3439908ce 100644 --- a/sys/netipsec/subr_ipsec.c +++ b/sys/netipsec/subr_ipsec.c @@ -369,8 +369,8 @@ IPSEC_KMOD_METHOD(int, ipsec_kmod_ctlinput, sc, ) IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, - output, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, - struct inpcb *inp), METHOD_ARGS(m, inp) + output, METHOD_DECL(struct ipsec_support * const sc, struct ifnet *ifp, + struct mbuf *m, struct inpcb *inp), METHOD_ARGS(ifp, m, inp) ) IPSEC_KMOD_METHOD(int, ipsec_kmod_pcbctl, sc, From 00524fd475995d30780ce80ec75e085223206cac Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 30 Jan 2023 19:56:00 +0200 Subject: [PATCH 05/25] ipsec_output(): add mtu argument Similarly, mtu is needed to decide inline IPSEC offloiad for the driver. Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44224 --- sys/net/if_ipsec.c | 4 ++-- sys/netinet/ip_output.c | 2 +- sys/netinet6/ip6_output.c | 2 +- sys/netipsec/ipsec.h | 2 +- sys/netipsec/ipsec6.h | 2 +- sys/netipsec/ipsec_output.c | 36 ++++++++++++++++++------------------ sys/netipsec/ipsec_support.h | 9 +++++---- sys/netipsec/subr_ipsec.c | 7 ++++--- 8 files changed, 33 insertions(+), 31 deletions(-) diff --git a/sys/net/if_ipsec.c b/sys/net/if_ipsec.c index bdf500431eff..849127f75de6 100644 --- a/sys/net/if_ipsec.c +++ b/sys/net/if_ipsec.c @@ -415,12 +415,12 @@ ipsec_transmit(struct ifnet *ifp, struct mbuf *m) switch (af) { #ifdef INET case AF_INET: - error = ipsec4_process_packet(ifp, m, sp, NULL); + error = ipsec4_process_packet(ifp, m, sp, NULL, ifp->if_mtu); break; #endif #ifdef INET6 case AF_INET6: - error = ipsec6_process_packet(ifp, m, sp, NULL); + error = ipsec6_process_packet(ifp, m, sp, NULL, ifp->if_mtu); break; #endif default: diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 77708f84c3e9..770a95dae659 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -673,7 +673,7 @@ sendit: error = ENOBUFS; goto bad; } - if ((error = IPSEC_OUTPUT(ipv4, ifp, m, inp)) != 0) { + if ((error = IPSEC_OUTPUT(ipv4, ifp, m, inp, mtu)) != 0) { if (error == EINPROGRESS) error = 0; goto done; diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 800fa691062f..68dd376af5d0 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -462,7 +462,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, error = ENOBUFS; goto bad; } - if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp)) != 0) { + if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp, mtu)) != 0) { if (error == EINPROGRESS) error = 0; goto done; diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index a90953531b99..2a1dcb8bb77b 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -338,7 +338,7 @@ void ipsec4_setsockaddrs(const struct mbuf *, union sockaddr_union *, int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); int ipsec4_check_pmtu(struct ifnet *, struct mbuf *, struct secpolicy *, int); int ipsec4_process_packet(struct ifnet *, struct mbuf *, struct secpolicy *, - struct inpcb *); + struct inpcb *, u_long); int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, u_int); diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h index 9c5d6e695417..a7410733c43f 100644 --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -68,7 +68,7 @@ void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, int ipsec6_common_input_cb(struct mbuf *, struct secasvar *, int, int); int ipsec6_check_pmtu(struct ifnet *, struct mbuf *, struct secpolicy *, int); int ipsec6_process_packet(struct ifnet *, struct mbuf *, struct secpolicy *, - struct inpcb *); + struct inpcb *, u_long); int ip6_ipsec_filtertunnel(struct mbuf *); int ip6_ipsec_pcbctl(struct inpcb *, struct sockopt *); diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 08b6289ec1d5..be996f257b64 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -188,7 +188,7 @@ next: */ static int ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp, u_int idx) + struct inpcb *inp, u_int idx, u_long mtu) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; @@ -290,10 +290,10 @@ bad: int ipsec4_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp) + struct inpcb *inp, u_long mtu) { - return (ipsec4_perform_request(ifp, m, sp, inp, 0)); + return (ipsec4_perform_request(ifp, m, sp, inp, 0, mtu)); } int @@ -371,7 +371,7 @@ setdf: static int ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, - int forwarding) + int forwarding, u_long mtu) { struct secpolicy *sp; int error; @@ -423,7 +423,7 @@ ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, return (error); } - error = ipsec4_process_packet(ifp, m, sp, inp); + error = ipsec4_process_packet(ifp, m, sp, inp, mtu); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -443,7 +443,7 @@ ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, * other values - mbuf consumed by IPsec. */ int -ipsec4_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp) +ipsec4_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu) { /* @@ -454,7 +454,7 @@ ipsec4_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp) if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); - return (ipsec4_common_output(ifp, m, inp, 0)); + return (ipsec4_common_output(ifp, m, inp, 0, mtu)); } /* @@ -474,7 +474,7 @@ ipsec4_forward(struct mbuf *m) m_freem(m); return (EACCES); } - return (ipsec4_common_output(NULL /* XXXKIB */, m, NULL, 1)); + return (ipsec4_common_output(NULL /* XXXKIB */, m, NULL, 1, 0)); } #endif @@ -584,7 +584,7 @@ next: */ static int ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp, u_int idx) + struct inpcb *inp, u_int idx, u_long mtu) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; @@ -676,10 +676,10 @@ bad: int ipsec6_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp) + struct inpcb *inp, u_long mtu) { - return (ipsec6_perform_request(ifp, m, sp, inp, 0)); + return (ipsec6_perform_request(ifp, m, sp, inp, 0, mtu)); } /* @@ -751,7 +751,7 @@ ipsec6_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, static int ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, - int forwarding) + int forwarding, u_long mtu) { struct secpolicy *sp; int error; @@ -794,7 +794,7 @@ ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, } /* NB: callee frees mbuf and releases reference to SP */ - error = ipsec6_process_packet(ifp, m, sp, inp); + error = ipsec6_process_packet(ifp, m, sp, inp, mtu); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -814,7 +814,7 @@ ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, * other values - mbuf consumed by IPsec. */ int -ipsec6_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp) +ipsec6_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu) { /* @@ -825,7 +825,7 @@ ipsec6_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp) if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); - return (ipsec6_common_output(ifp, m, inp, 0)); + return (ipsec6_common_output(ifp, m, inp, 0, mtu)); } /* @@ -845,7 +845,7 @@ ipsec6_forward(struct mbuf *m) m_freem(m); return (EACCES); } - return (ipsec6_common_output(NULL /* XXXKIB */, m, NULL, 1)); + return (ipsec6_common_output(NULL /* XXXKIB */, m, NULL, 1, 0)); } #endif /* INET6 */ @@ -923,7 +923,7 @@ ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, key_freesav(&sav); IPSECSTAT_INC(ips_out_bundlesa); return (ipsec4_perform_request(NULL, m, sp, NULL, - idx)); + idx, 0)); /* NOTREACHED */ #endif #ifdef INET6 @@ -931,7 +931,7 @@ ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, key_freesav(&sav); IPSEC6STAT_INC(ips_out_bundlesa); return (ipsec6_perform_request(NULL, m, sp, NULL, - idx)); + idx, 0)); /* NOTREACHED */ #endif /* INET6 */ default: diff --git a/sys/netipsec/ipsec_support.h b/sys/netipsec/ipsec_support.h index 96d753f48f42..0cbfe6ddfee5 100644 --- a/sys/netipsec/ipsec_support.h +++ b/sys/netipsec/ipsec_support.h @@ -59,7 +59,7 @@ int ipsec4_in_reject(const struct mbuf *, struct inpcb *); int ipsec4_input(struct mbuf *, int, int); int ipsec4_forward(struct mbuf *); int ipsec4_pcbctl(struct inpcb *, struct sockopt *); -int ipsec4_output(struct ifnet *, struct mbuf *, struct inpcb *); +int ipsec4_output(struct ifnet *, struct mbuf *, struct inpcb *, u_long); int ipsec4_capability(struct mbuf *, u_int); int ipsec4_ctlinput(ipsec_ctlinput_param_t); #endif /* INET */ @@ -69,7 +69,7 @@ int ipsec6_input(struct mbuf *, int, int); int ipsec6_in_reject(const struct mbuf *, struct inpcb *); int ipsec6_forward(struct mbuf *); int ipsec6_pcbctl(struct inpcb *, struct sockopt *); -int ipsec6_output(struct ifnet *, struct mbuf *, struct inpcb *); +int ipsec6_output(struct ifnet *, struct mbuf *, struct inpcb *, u_long); int ipsec6_capability(struct mbuf *, u_int); int ipsec6_ctlinput(ipsec_ctlinput_param_t); #endif /* INET6 */ @@ -78,7 +78,8 @@ struct ipsec_methods { int (*input)(struct mbuf *, int, int); int (*check_policy)(const struct mbuf *, struct inpcb *); int (*forward)(struct mbuf *); - int (*output)(struct ifnet *, struct mbuf *, struct inpcb *); + int (*output)(struct ifnet *, struct mbuf *, struct inpcb *, + u_long); int (*pcbctl)(struct inpcb *, struct sockopt *); size_t (*hdrsize)(struct inpcb *); int (*capability)(struct mbuf *, u_int); @@ -189,7 +190,7 @@ int ipsec_kmod_check_policy(struct ipsec_support * const, struct mbuf *, struct inpcb *); int ipsec_kmod_forward(struct ipsec_support * const, struct mbuf *); int ipsec_kmod_output(struct ipsec_support * const, struct ifnet *, - struct mbuf *, struct inpcb *); + struct mbuf *, struct inpcb *, u_long); int ipsec_kmod_pcbctl(struct ipsec_support * const, struct inpcb *, struct sockopt *); int ipsec_kmod_capability(struct ipsec_support * const, struct mbuf *, u_int); diff --git a/sys/netipsec/subr_ipsec.c b/sys/netipsec/subr_ipsec.c index 46b3439908ce..2f7e0f09882e 100644 --- a/sys/netipsec/subr_ipsec.c +++ b/sys/netipsec/subr_ipsec.c @@ -368,9 +368,10 @@ IPSEC_KMOD_METHOD(int, ipsec_kmod_ctlinput, sc, ipsec_ctlinput_param_t param), METHOD_ARGS(param) ) -IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, - output, METHOD_DECL(struct ipsec_support * const sc, struct ifnet *ifp, - struct mbuf *m, struct inpcb *inp), METHOD_ARGS(ifp, m, inp) +IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, output, + METHOD_DECL(struct ipsec_support * const sc, struct ifnet *ifp, + struct mbuf *m, struct inpcb *inp, u_long mtu), + METHOD_ARGS(ifp, m, inp, mtu) ) IPSEC_KMOD_METHOD(int, ipsec_kmod_pcbctl, sc, From 3673801f53065529092ea22ecc2a07f306b984f2 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 7 Feb 2024 03:48:00 +0200 Subject: [PATCH 06/25] Define mbuf tags for IPSEC_ACCEL in/out with ABI_COMPAT The tags are used to pass the information about SA to/from the driver doing inline IPSEC offload. Sponsored by: NVIDIA networking --- sys/sys/mbuf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 6e8454ea62d6..ab494a76833e 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1386,6 +1386,8 @@ extern bool mb_use_ext_pgs; /* Use ext_pgs for sendfile */ #define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */ #define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */ #define PACKET_TAG_PF_REASSEMBLED 31 +#define PACKET_TAG_IPSEC_ACCEL_OUT 32 /* IPSEC accel out */ +#define PACKET_TAG_IPSEC_ACCEL_IN 33 /* IPSEC accel in */ /* Specific cookies and tags. */ From da0efbdb8d952a1e9d15a5e332149e1f1092f5d9 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 25 Jan 2023 13:03:51 +0200 Subject: [PATCH 07/25] ip6_output: place IPSEC_OUTPUT hook after the outgoing ifp is calculated To be able to pass ifp and mtu to the ipsec_output() and ipsec accelerator filter. Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44225 --- sys/netinet6/ip6_output.c | 42 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 68dd376af5d0..7eea64bb6344 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -449,27 +449,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, #endif } -#if defined(IPSEC) || defined(IPSEC_SUPPORT) - /* - * IPSec checking which handles several cases. - * FAST IPSEC: We re-injected the packet. - * XXX: need scope argument. - */ - if (IPSEC_ENABLED(ipv6)) { - m = mb_unmapped_to_ext(m); - if (m == NULL) { - IP6STAT_INC(ip6s_odropped); - error = ENOBUFS; - goto bad; - } - if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp, mtu)) != 0) { - if (error == EINPROGRESS) - error = 0; - goto done; - } - } -#endif /* IPSEC */ - /* Source address validation. */ ip6 = mtod(m, struct ip6_hdr *); if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && @@ -806,6 +785,27 @@ nonh6lookup: KASSERT((ifp != NULL), ("output interface must not be NULL")); KASSERT((origifp != NULL), ("output address interface must not be NULL")); +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * IPSec checking which handles several cases. + * FAST IPSEC: We re-injected the packet. + * XXX: need scope argument. + */ + if (IPSEC_ENABLED(ipv6)) { + m = mb_unmapped_to_ext(m); + if (m == NULL) { + IP6STAT_INC(ip6s_odropped); + error = ENOBUFS; + goto bad; + } + if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp, mtu)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } + } +#endif /* IPSEC */ + if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); From b256ff9303ba6457584de4fe695f8b0322cd5fdc Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 7 Feb 2024 03:49:14 +0200 Subject: [PATCH 08/25] sys/pfkeyv2.h: define extensions for ipsec inline accel control The extensions allow to restrict interface where SP or SA are offloaded, and to receive software and hardware offload counters for given SA. Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44316 --- sys/net/pfkeyv2.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sys/net/pfkeyv2.h b/sys/net/pfkeyv2.h index a0246c9223ba..8df7536f7563 100644 --- a/sys/net/pfkeyv2.h +++ b/sys/net/pfkeyv2.h @@ -296,6 +296,13 @@ struct sadb_x_sa_replay { }; _Static_assert(sizeof(struct sadb_x_sa_replay) == 8, "struct size mismatch"); +struct sadb_x_if_hw_offl { + u_int16_t sadb_x_if_hw_offl_len; + u_int16_t sadb_x_if_hw_offl_exttype; + u_int32_t sadb_x_if_hw_offl_flags; + u_int8_t sadb_x_if_hw_offl_if[32]; /* IF_NAMESIZE is 16, keep room */ +}; + #define SADB_EXT_RESERVED 0 #define SADB_EXT_SA 1 #define SADB_EXT_LIFETIME_CURRENT 2 @@ -326,7 +333,10 @@ _Static_assert(sizeof(struct sadb_x_sa_replay) == 8, "struct size mismatch"); #define SADB_X_EXT_SA_REPLAY 26 /* Replay window override. */ #define SADB_X_EXT_NEW_ADDRESS_SRC 27 #define SADB_X_EXT_NEW_ADDRESS_DST 28 -#define SADB_EXT_MAX 28 +#define SADB_X_EXT_LFT_CUR_SW_OFFL 29 +#define SADB_X_EXT_LFT_CUR_HW_OFFL 30 +#define SADB_X_EXT_IF_HW_OFFL 31 +#define SADB_EXT_MAX 31 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 From 1b1cd327d905fab5920e7af2582d15d48ad26999 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 4 Jun 2023 01:56:19 +0300 Subject: [PATCH 09/25] setkey(8): print out SA sw/hw offload counters Sponsored by: NVIDIA networking --- lib/libipsec/pfkey.c | 2 ++ lib/libipsec/pfkey_dump.c | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/libipsec/pfkey.c b/lib/libipsec/pfkey.c index 31464d063241..2308dd3281a7 100644 --- a/lib/libipsec/pfkey.c +++ b/lib/libipsec/pfkey.c @@ -1713,6 +1713,8 @@ pfkey_align(struct sadb_msg *msg, caddr_t *mhp) case SADB_X_EXT_SA_REPLAY: case SADB_X_EXT_NEW_ADDRESS_SRC: case SADB_X_EXT_NEW_ADDRESS_DST: + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: mhp[ext->sadb_ext_type] = (caddr_t)ext; break; default: diff --git a/lib/libipsec/pfkey_dump.c b/lib/libipsec/pfkey_dump.c index cd5961822751..2e69277fe6c2 100644 --- a/lib/libipsec/pfkey_dump.c +++ b/lib/libipsec/pfkey_dump.c @@ -201,7 +201,7 @@ pfkey_sadump(struct sadb_msg *m) caddr_t mhp[SADB_EXT_MAX + 1]; struct sadb_sa *m_sa; struct sadb_x_sa2 *m_sa2; - struct sadb_lifetime *m_lftc, *m_lfth, *m_lfts; + struct sadb_lifetime *m_lftc, *m_lfth, *m_lfts, *m_lft_sw, *m_lft_hw; struct sadb_address *m_saddr, *m_daddr, *m_paddr; struct sadb_key *m_auth, *m_enc; struct sadb_ident *m_sid, *m_did; @@ -240,7 +240,8 @@ pfkey_sadump(struct sadb_msg *m) natt_dport = (struct sadb_x_nat_t_port *)mhp[SADB_X_EXT_NAT_T_DPORT]; natt_oai = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAI]; natt_oar = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAR]; - + m_lft_sw = (struct sadb_lifetime *)mhp[SADB_X_EXT_LFT_CUR_SW_OFFL]; + m_lft_hw = (struct sadb_lifetime *)mhp[SADB_X_EXT_LFT_CUR_HW_OFFL]; /* source address */ if (m_saddr == NULL) { @@ -381,7 +382,23 @@ pfkey_sadump(struct sadb_msg *m) /* XXX DEBUG */ printf("refcnt=%u\n", m->sadb_msg_reserved); - return; + if (m_lft_sw != NULL) { + printf("\tsw offl use: %s", + str_time(m_lft_sw->sadb_lifetime_usetime)); + printf("\tsw offl allocated: %lu", + (unsigned long)m_lft_sw->sadb_lifetime_allocations); + str_lifetime_byte(m_lft_sw, "sw offl"); + printf("\n"); + } + + if (m_lft_hw != NULL) { + printf("\thw offl use: %s", + str_time(m_lft_hw->sadb_lifetime_usetime)); + printf("\thw offl allocated: %lu", + (unsigned long)m_lft_hw->sadb_lifetime_allocations); + str_lifetime_byte(m_lft_hw, "hw offl"); + printf("\n"); + } } void From 3d95e9e3fe24f598a37c30e0418c49cfcec1b502 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 4 Sep 2023 21:32:27 +0300 Subject: [PATCH 10/25] libipsec: decode SADB_X_EXT_IF_HW_OFFL Sponsired by: NVIDIA networking --- lib/libipsec/pfkey.c | 4 +++- lib/libipsec/pfkey_dump.c | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/lib/libipsec/pfkey.c b/lib/libipsec/pfkey.c index 2308dd3281a7..d5a033c4c501 100644 --- a/lib/libipsec/pfkey.c +++ b/lib/libipsec/pfkey.c @@ -1677,7 +1677,8 @@ pfkey_align(struct sadb_msg *msg, caddr_t *mhp) /* duplicate check */ /* XXX Are there duplication either KEY_AUTH or KEY_ENCRYPT ?*/ - if (mhp[ext->sadb_ext_type] != NULL) { + if (mhp[ext->sadb_ext_type] != NULL && + ext->sadb_ext_type != SADB_X_EXT_IF_HW_OFFL /* XXXKIB */) { __ipsec_errcode = EIPSEC_INVAL_EXTTYPE; return -1; } @@ -1715,6 +1716,7 @@ pfkey_align(struct sadb_msg *msg, caddr_t *mhp) case SADB_X_EXT_NEW_ADDRESS_DST: case SADB_X_EXT_LFT_CUR_SW_OFFL: case SADB_X_EXT_LFT_CUR_HW_OFFL: + case SADB_X_EXT_IF_HW_OFFL: mhp[ext->sadb_ext_type] = (caddr_t)ext; break; default: diff --git a/lib/libipsec/pfkey_dump.c b/lib/libipsec/pfkey_dump.c index 2e69277fe6c2..8dcc21be16e2 100644 --- a/lib/libipsec/pfkey_dump.c +++ b/lib/libipsec/pfkey_dump.c @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -210,6 +211,10 @@ pfkey_sadump(struct sadb_msg *m) struct sadb_x_nat_t_type *natt_type; struct sadb_x_nat_t_port *natt_sport, *natt_dport; struct sadb_address *natt_oai, *natt_oar; + struct sadb_x_if_hw_offl *if_hw_offl; + caddr_t p, ep; + struct sadb_ext *ext; + bool first; /* check pfkey message. */ if (pfkey_align(m, mhp)) { @@ -242,6 +247,7 @@ pfkey_sadump(struct sadb_msg *m) natt_oar = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAR]; m_lft_sw = (struct sadb_lifetime *)mhp[SADB_X_EXT_LFT_CUR_SW_OFFL]; m_lft_hw = (struct sadb_lifetime *)mhp[SADB_X_EXT_LFT_CUR_HW_OFFL]; + if_hw_offl = (struct sadb_x_if_hw_offl *)mhp[SADB_X_EXT_IF_HW_OFFL]; /* source address */ if (m_saddr == NULL) { @@ -333,6 +339,27 @@ pfkey_sadump(struct sadb_msg *m) GETMSGSTR(str_state, m_sa->sadb_sa_state); printf("\n"); + /* hw offload interface */ + if (if_hw_offl != NULL) { + p = (caddr_t)m; + ep = p + PFKEY_UNUNIT64(m->sadb_msg_len); + p += sizeof(struct sadb_msg); + printf("\thw offl if: "); + + for (first = true; p < ep; p += PFKEY_EXTLEN(ext)) { + ext = (struct sadb_ext *)p; + if (ext->sadb_ext_type != SADB_X_EXT_IF_HW_OFFL) + continue; + if_hw_offl = (struct sadb_x_if_hw_offl *)ext; + if (first) + first = false; + else + printf(","); + printf("%s", if_hw_offl->sadb_x_if_hw_offl_if); + } + printf("\n"); + } + /* lifetime */ if (m_lftc != NULL) { time_t tmp_time = time(0); From 9b93b2d8b039e24d8e2505a6aaf8bc1f29863052 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 7 Sep 2023 15:04:49 +0300 Subject: [PATCH 11/25] setkey(8): add -esn extension option to enable ESN Sponsored by: NVIDIA networking --- sbin/setkey/parse.y | 10 ++++++++++ sbin/setkey/setkey.8 | 2 ++ sbin/setkey/token.l | 1 + 3 files changed, 13 insertions(+) diff --git a/sbin/setkey/parse.y b/sbin/setkey/parse.y index 448a8ee5278c..27a0109db333 100644 --- a/sbin/setkey/parse.y +++ b/sbin/setkey/parse.y @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -68,6 +69,7 @@ u_int p_natt_type; struct addrinfo *p_natt_oai, *p_natt_oar; int p_natt_sport, p_natt_dport; int p_natt_fraglen; +bool esn; static int p_aiflags = 0, p_aifamily = PF_UNSPEC; @@ -115,6 +117,7 @@ extern void yyerror(const char *); %token SPDADD SPDDELETE SPDDUMP SPDFLUSH %token F_POLICY PL_REQUESTS %token F_AIFLAGS F_NATT F_NATT_MTU +%token F_ESN %token TAGGED %type prefix protocol_spec upper_spec @@ -539,6 +542,11 @@ extension { p_natt_fraglen = $2; } + | F_ESN + { + esn = true; + p_ext |= SADB_X_SAFLAGS_ESN; + } ; /* definition about command for SPD management */ @@ -1355,6 +1363,8 @@ parse_init(void) p_natt_oai = p_natt_oar = NULL; p_natt_sport = p_natt_dport = 0; p_natt_fraglen = -1; + + esn = false; } void diff --git a/sbin/setkey/setkey.8 b/sbin/setkey/setkey.8 index 88b4dc6fc91f..23a838f76541 100644 --- a/sbin/setkey/setkey.8 +++ b/sbin/setkey/setkey.8 @@ -341,6 +341,8 @@ symbols are part of the syntax for the ports specification, not indication of the optional components. .It Fl natt_mtu Ar fragsize Configure NAT-T fragment size. +.It Fl esn +Enable Extended Sequence Number extension for this SA. .El .\" .Pp diff --git a/sbin/setkey/token.l b/sbin/setkey/token.l index 054a57ef1015..b96eaf93924c 100644 --- a/sbin/setkey/token.l +++ b/sbin/setkey/token.l @@ -187,6 +187,7 @@ nocyclic-seq { return(NOCYCLICSEQ); } {hyphen}ls { return(F_LIFETIME_SOFT); } {hyphen}natt { return(F_NATT); } {hyphen}natt_mtu { return(F_NATT_MTU); } +{hyphen}esn { return(F_ESN); } /* ... */ any { return(ANY); } From 83418c878b75fafd5f9bfc44baf049487ce99a86 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 18 Sep 2023 05:04:45 +0300 Subject: [PATCH 12/25] setkey(8): add -hwif extension to specify offload interface for SA and SPD Sponsored by: NVIDIA networking --- sbin/setkey/parse.y | 49 +++++++++++++++++++++++++++++++++++++++++++-- sbin/setkey/token.l | 1 + 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/sbin/setkey/parse.y b/sbin/setkey/parse.y index 27a0109db333..a7bcd2d8dafc 100644 --- a/sbin/setkey/parse.y +++ b/sbin/setkey/parse.y @@ -70,6 +70,7 @@ struct addrinfo *p_natt_oai, *p_natt_oar; int p_natt_sport, p_natt_dport; int p_natt_fraglen; bool esn; +vchar_t p_hwif; static int p_aiflags = 0, p_aifamily = PF_UNSPEC; @@ -117,7 +118,7 @@ extern void yyerror(const char *); %token SPDADD SPDDELETE SPDDUMP SPDFLUSH %token F_POLICY PL_REQUESTS %token F_AIFLAGS F_NATT F_NATT_MTU -%token F_ESN +%token F_ESN F_HWIF %token TAGGED %type prefix protocol_spec upper_spec @@ -547,12 +548,16 @@ extension esn = true; p_ext |= SADB_X_SAFLAGS_ESN; } + | F_HWIF STRING + { + p_hwif = $2; + } ; /* definition about command for SPD management */ /* spdadd */ spdadd_command - : SPDADD ipaddropts STRING prefix portstr STRING prefix portstr upper_spec upper_misc_spec policy_spec EOT + : SPDADD ipaddropts STRING prefix portstr STRING prefix portstr upper_spec upper_misc_spec policy_spec spd_hwif EOT { int status; struct addrinfo *src, *dst; @@ -656,6 +661,14 @@ ipaddropts | ipaddropts ipaddropt ; +spd_hwif + : + | F_HWIF STRING + { + p_hwif = $2; + } + ; + ipaddropt : F_AIFLAGS { @@ -839,6 +852,7 @@ setkeymsg_spdaddr(unsigned type, unsigned upper, vchar_t *policy, char buf[BUFSIZ]; int l, l0; struct sadb_address m_addr; + struct sadb_x_if_hw_offl m_if_hw; struct addrinfo *s, *d; int n; int plen; @@ -857,6 +871,20 @@ setkeymsg_spdaddr(unsigned type, unsigned upper, vchar_t *policy, memcpy(buf + l, policy->buf, policy->len); l += policy->len; + if (p_hwif.len != 0) { + l0 = sizeof(struct sadb_x_if_hw_offl); + m_if_hw.sadb_x_if_hw_offl_len = PFKEY_UNIT64(l0); + m_if_hw.sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + m_if_hw.sadb_x_if_hw_offl_flags = 0; + memset(&m_if_hw.sadb_x_if_hw_offl_if[0], 0, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + strlcpy(&m_if_hw.sadb_x_if_hw_offl_if[0], p_hwif.buf, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + + memcpy(buf + l, &m_if_hw, l0); + l += l0; + } + l0 = l; n = 0; @@ -1048,6 +1076,7 @@ setkeymsg_add(unsigned type, unsigned satype, struct addrinfo *srcs, struct sadb_x_nat_t_type m_natt_type; struct sadb_x_nat_t_port m_natt_port; struct sadb_x_nat_t_frag m_natt_frag; + struct sadb_x_if_hw_offl m_if_hw; int n; int plen; struct sockaddr *sa; @@ -1264,6 +1293,20 @@ setkeymsg_add(unsigned type, unsigned satype, struct addrinfo *srcs, } } + if (p_hwif.len != 0) { + len = sizeof(struct sadb_x_if_hw_offl); + m_if_hw.sadb_x_if_hw_offl_len = PFKEY_UNIT64(len); + m_if_hw.sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + m_if_hw.sadb_x_if_hw_offl_flags = 0; + memset(&m_if_hw.sadb_x_if_hw_offl_if[0], 0, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + strlcpy(&m_if_hw.sadb_x_if_hw_offl_if[0], p_hwif.buf, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + + memcpy(buf + l, &m_if_hw, len); + l += len; + } + if (n == 0) return -1; else @@ -1365,6 +1408,8 @@ parse_init(void) p_natt_fraglen = -1; esn = false; + p_hwif.len = 0; + p_hwif.buf = NULL; } void diff --git a/sbin/setkey/token.l b/sbin/setkey/token.l index b96eaf93924c..65756f0fd12c 100644 --- a/sbin/setkey/token.l +++ b/sbin/setkey/token.l @@ -188,6 +188,7 @@ nocyclic-seq { return(NOCYCLICSEQ); } {hyphen}natt { return(F_NATT); } {hyphen}natt_mtu { return(F_NATT_MTU); } {hyphen}esn { return(F_ESN); } +{hyphen}hwif { return(F_HWIF); } /* ... */ any { return(ANY); } From 2131654bde1f91b04c959b388cffbf825a433d27 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 7 Feb 2024 03:51:21 +0200 Subject: [PATCH 13/25] sys/net: Add IPSEC_OFFLOAD interface cap and methods structure Reviewed by: glebius Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44314 --- sys/net/if.c | 7 +++++++ sys/net/if.h | 4 +++- sys/net/if_private.h | 2 ++ sys/net/if_strings.h | 3 +++ sys/net/if_var.h | 30 ++++++++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/sys/net/if.c b/sys/net/if.c index ee8fe533f338..604a93aa7cba 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -2392,6 +2392,7 @@ const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = { const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = { CAP2NV(RXTLS4), CAP2NV(RXTLS6), + CAP2NV(IPSEC_OFFLOAD), {0, NULL} }; #undef CAPNV @@ -5149,6 +5150,12 @@ if_getl2com(if_t ifp) return (ifp->if_l2com); } +void +if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *m) +{ + ifp->if_ipsec_accel_m = m; +} + #ifdef DDB static void if_show_ifnet(struct ifnet *ifp) diff --git a/sys/net/if.h b/sys/net/if.h index cbd69b4912ed..5c4b0637b25a 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -255,7 +255,8 @@ struct if_data { #define IFCAP_B_TXTLS_RTLMT 31 /* can do TLS with rate limiting */ #define IFCAP_B_RXTLS4 32 /* can to TLS receive for TCP */ #define IFCAP_B_RXTLS6 33 /* can to TLS receive for TCP6 */ -#define __IFCAP_B_SIZE 34 +#define IFCAP_B_IPSEC_OFFLOAD 34 /* inline IPSEC offload */ +#define __IFCAP_B_SIZE 35 #define IFCAP_B_MAX (__IFCAP_B_MAX - 1) #define IFCAP_B_SIZE (__IFCAP_B_SIZE) @@ -298,6 +299,7 @@ struct if_data { /* IFCAP2_* are integers, not bits. */ #define IFCAP2_RXTLS4 (IFCAP_B_RXTLS4 - 32) #define IFCAP2_RXTLS6 (IFCAP_B_RXTLS6 - 32) +#define IFCAP2_IPSEC_OFFLOAD (IFCAP_B_IPSEC_OFFLOAD - 32) #define IFCAP2_BIT(x) (1UL << (x)) diff --git a/sys/net/if_private.h b/sys/net/if_private.h index 1aaf9d217f0d..3da529e6b22e 100644 --- a/sys/net/if_private.h +++ b/sys/net/if_private.h @@ -138,6 +138,8 @@ struct ifnet { int (*if_requestencap) /* make link header from request */ (struct ifnet *, struct if_encap_req *); + const struct if_ipsec_accel_methods *if_ipsec_accel_m; + /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; diff --git a/sys/net/if_strings.h b/sys/net/if_strings.h index bea15cfa9de5..a127fa273a8b 100644 --- a/sys/net/if_strings.h +++ b/sys/net/if_strings.h @@ -60,9 +60,11 @@ #define IFCAP_TXTLS_RTLMT_NAME "TXTLS_RTLMT" #define IFCAP_RXTLS4_NAME "RXTLS4" #define IFCAP_RXTLS6_NAME "RXTLS6" +#define IFCAP_IPSEC_OFFLOAD_NAME "IPSEC" #define IFCAP2_RXTLS4_NAME IFCAP_RXTLS4_NAME #define IFCAP2_RXTLS6_NAME IFCAP_RXTLS6_NAME +#define IFCAP2_IPSEC_OFFLOAD_NAME IFCAP_IPSEC_OFFLOAD_NAME static const char *ifcap_bit_names[] = { IFCAP_RXCSUM_NAME, @@ -99,6 +101,7 @@ static const char *ifcap_bit_names[] = { IFCAP_TXTLS_RTLMT_NAME, IFCAP_RXTLS4_NAME, IFCAP_RXTLS6_NAME, + IFCAP_IPSEC_OFFLOAD_NAME, }; #ifdef IFCAP_B_SIZE diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 1b9e158a1b29..3e094dcb3cd5 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -131,6 +131,23 @@ typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); typedef void (*if_reassign_fn_t)(if_t, struct vnet *, char *); +typedef int (*if_spdadd_fn_t)(if_t, void *sp, void *inp, void **priv); +typedef int (*if_spddel_fn_t)(if_t, void *sp, void *priv); +typedef int (*if_sa_newkey_fn_t)(if_t ifp, void *sav, u_int drv_spi, + void **privp); +typedef int (*if_sa_deinstall_fn_t)(if_t ifp, u_int drv_spi, void *priv); +struct seclifetime; +#define IF_SA_CNT_UPD 0x80000000 +enum IF_SA_CNT_WHICH { + IF_SA_CNT_IFP_HW_VAL = 1, + IF_SA_CNT_TOTAL_SW_VAL, + IF_SA_CNT_TOTAL_HW_VAL, + IF_SA_CNT_IFP_HW_UPD = IF_SA_CNT_IFP_HW_VAL | IF_SA_CNT_UPD, + IF_SA_CNT_TOTAL_SW_UPD = IF_SA_CNT_TOTAL_SW_VAL | IF_SA_CNT_UPD, + IF_SA_CNT_TOTAL_HW_UPD = IF_SA_CNT_TOTAL_HW_VAL | IF_SA_CNT_UPD, +}; +typedef int (*if_sa_cnt_fn_t)(if_t ifp, void *sa, + uint32_t drv_spi, void *priv, struct seclifetime *lt); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ @@ -700,6 +717,19 @@ void if_setdebugnet_methods(if_t, struct debugnet_methods *); void if_setreassignfn(if_t ifp, if_reassign_fn_t); void if_setratelimitqueryfn(if_t ifp, if_ratelimit_query_t); +/* + * NB: The interface is not yet stable, drivers implementing IPSEC + * offload need to be prepared to adapt to changes. + */ +struct if_ipsec_accel_methods { + if_spdadd_fn_t if_spdadd; + if_spddel_fn_t if_spddel; + if_sa_newkey_fn_t if_sa_newkey; + if_sa_deinstall_fn_t if_sa_deinstall; + if_sa_cnt_fn_t if_sa_cnt; +}; +void if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *); + /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); From 9d269938e300a99481e4d28d3a49122201f9bc4f Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 31 Mar 2023 01:49:08 +0300 Subject: [PATCH 14/25] ipsec_offload: ifconfig support Sponsored by: NVIDIA networking --- sbin/ifconfig/ifconfig.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index 615de5d4ae14..e6ed9015b34b 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -2144,6 +2144,8 @@ static struct cmd basic_cmds[] = { setifcapnv), DEF_CMD_SARG("-rxtls", "-"IFCAP2_RXTLS4_NAME ",-" IFCAP2_RXTLS6_NAME, setifcapnv), + DEF_CMD_SARG("ipsec", IFCAP2_IPSEC_OFFLOAD_NAME, setifcapnv), + DEF_CMD_SARG("-ipsec", "-"IFCAP2_IPSEC_OFFLOAD_NAME, setifcapnv), DEF_CMD("wol", IFCAP_WOL, setifcap), DEF_CMD("-wol", IFCAP_WOL, clearifcap), DEF_CMD("wol_ucast", IFCAP_WOL_UCAST, setifcap), From b6919741b7479fab6886ae76ec151f4103bcf350 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 14 Nov 2023 03:45:49 +0200 Subject: [PATCH 15/25] ipsec_offload: handle TSO if supported Allow for TSO to operate if network interface supports ipsec inline offload and supports TSO over it. Reviewed by: tuexen Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44222 --- sys/netinet/tcp_input.c | 2 ++ sys/netinet/tcp_output.c | 20 +++++++++----------- sys/netinet/tcp_subr.c | 4 ++++ sys/netinet/tcp_var.h | 2 ++ 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 7faa815fc1ac..833a1e501780 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -3979,6 +3979,8 @@ tcp_mss(struct tcpcb *tp, int offer) tp->t_tsomax = cap.tsomax; tp->t_tsomaxsegcount = cap.tsomaxsegcount; tp->t_tsomaxsegsize = cap.tsomaxsegsize; + if (cap.ipsec_tso) + tp->t_flags2 |= TF2_IPSEC_TSO; } } diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index c318e8517c2e..080dabf3232f 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -201,9 +201,7 @@ tcp_default_output(struct tcpcb *tp) struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen, ulen; -#if defined(IPSEC) || defined(IPSEC_SUPPORT) unsigned ipsec_optlen = 0; -#endif int idle, sendalot, curticks; int sack_rxmit, sack_bytes_rxmt; struct sackhole *p; @@ -553,15 +551,15 @@ after_sack_rexmit: offsetof(struct ipoption, ipopt_list); else ipoptlen = 0; -#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; -#endif if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && (tp->t_port == 0) && ((tp->t_flags & TF_SIGNATURE) == 0) && tp->rcv_numsacks == 0 && ((sack_rxmit == 0) || V_tcp_sack_tso) && - ipoptlen == 0 && !(flags & TH_SYN)) + (ipoptlen == 0 || (ipoptlen == ipsec_optlen && + (tp->t_flags2 & TF2_IPSEC_TSO) != 0)) && + !(flags & TH_SYN)) tso = 1; if (SEQ_LT((sack_rxmit ? p->rxmit : tp->snd_nxt) + len, @@ -917,7 +915,7 @@ send: * overflowing or exceeding the maximum length * allowed by the network interface: */ - KASSERT(ipoptlen == 0, + KASSERT(ipoptlen == ipsec_optlen, ("%s: TSO can't do IP options", __func__)); /* @@ -926,8 +924,8 @@ send: */ if (if_hw_tsomax != 0) { /* compute maximum TSO length */ - max_len = (if_hw_tsomax - hdrlen - - max_linkhdr); + max_len = if_hw_tsomax - hdrlen - + ipsec_optlen - max_linkhdr; if (max_len <= 0) { len = 0; } else if (len > max_len) { @@ -941,7 +939,7 @@ send: * fractional unless the send sockbuf can be * emptied: */ - max_len = (tp->t_maxseg - optlen); + max_len = tp->t_maxseg - optlen - ipsec_optlen; if (((uint32_t)off + (uint32_t)len) < sbavail(&so->so_snd)) { moff = len % max_len; @@ -1393,10 +1391,10 @@ send: * The TCP pseudo header checksum is always provided. */ if (tso) { - KASSERT(len > tp->t_maxseg - optlen, + KASSERT(len > tp->t_maxseg - optlen - ipsec_optlen, ("%s: len <= tso_segsz", __func__)); m->m_pkthdr.csum_flags |= CSUM_TSO; - m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; + m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen - ipsec_optlen; } KASSERT(len + hdrlen == m_length(m, NULL), diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 3d860d0cf6f9..b4f605534d59 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -3349,6 +3349,9 @@ tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap) cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; + /* XXXKIB IFCAP2_IPSEC_OFFLOAD_TSO */ + cap->ipsec_tso = (ifp->if_capenable2 & + IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) != 0; } } } @@ -3388,6 +3391,7 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap) cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; + cap->ipsec_tso = false; /* XXXKIB */ } } } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index e81ebf301c8e..8330966c2c3f 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -844,6 +844,7 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */ #define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */ #define TF2_PROC_SACK_PROHIBIT 0x00100000 /* Due to small MSS size do not process sack's */ +#define TF2_IPSEC_TSO 0x00200000 /* IPSEC + TSO supported */ /* * Structure to hold TCP options that are only used during segment @@ -1430,6 +1431,7 @@ struct tcp_ifcap { u_int tsomax; u_int tsomaxsegcount; u_int tsomaxsegsize; + bool ipsec_tso; }; uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *); uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *); From ef2a572bf6bdcac97ef29ce631d2f50f938e1ec8 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 22 Aug 2021 22:38:04 +0300 Subject: [PATCH 16/25] ipsec_offload: kernel infrastructure Inline IPSEC offload moves almost whole IPSEC processing from the CPU/MCU and possibly crypto accelerator, to the network card. The transmitted packet content is not touched by CPU during TX operations, kernel only does the required policy and security association lookups to find out that given flow is offloaded, and then packet is transmitted as plain text to the card. For driver convenience, a metadata is attached to the packet identifying SA which must process the packet. Card does encryption of the payload, padding, calculates authentication, and does the reformat according to the policy. Similarly, on receive, card does the decapsulation, decryption, and authentification. Kernel receives the identifier of SA that was used to process the packet, together with the plain-text packet. Overall, payload octets are only read or written by card DMA engine, removing a lot of memory subsystem overhead, and saving CPU time because IPSEC algos calculations are avoided. If driver declares support for inline IPSEC offload (with the IFCAP2_IPSEC_OFFLOAD capability set and registering method table struct if_ipsec_accel_methods), kernel offers the SPD and SAD to driver. Driver decides which policies and SAs can be offloaded based on hardware capacity, and acks/nacks each SA for given interface to kernel. Kernel needs to keep this information to make a decision to skip software processing on TX, and to assume processing already done on RX. This shadow SPD/SAD database of offloads is rooted from policies (struct secpolicy accel_ifps, struct ifp_handle_sp) and SAs (struct secasvar accel_ipfs, struct ifp_handle_sav). Some extensions to the PF_KEY socket allow to limit interfaces for which given SP/SA could be offloaded (proposed for offload). Also, additional statistics extensions allow to observe allocation/octet/use counters for specific SA. Since SPs and SAs are typically instantiated in non-sleepable context, while offloading them into card is expected to require costly async manipulations of the card state, calls to the driver for offload and termination are executed in the threaded taskqueue. It also solves the issue of allocating resources needed for the offload database. Neither ipf_handle_sp nor ipf_handle_sav do not add reference to the owning SP/SA, the offload must be terminated before last reference is dropped. ipsec_accel only adds transient references to ensure safe pointer ownership by taskqueue. Maintaining the SA counters for hardware-accelerated packets is the duty of the driver. The helper ipsec_accel_drv_sa_lifetime_update() is provided to hide accel infrastructure from drivers which would use expected callout to query hardware periodically for updates. Reviewed by: rscheff (transport, stack integration), np Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44219 --- sys/conf/files | 2 + sys/conf/options | 1 + sys/modules/ipsec/Makefile | 5 +- sys/netipsec/ipsec.c | 17 + sys/netipsec/ipsec.h | 11 + sys/netipsec/ipsec_input.c | 11 + sys/netipsec/ipsec_offload.c | 1061 ++++++++++++++++++++++++++++++++++ sys/netipsec/ipsec_offload.h | 191 ++++++ sys/netipsec/ipsec_output.c | 15 + sys/netipsec/ipsec_pcb.c | 42 +- sys/netipsec/key.c | 270 ++++++++- sys/netipsec/key.h | 6 + sys/netipsec/key_debug.c | 5 + sys/netipsec/keydb.h | 14 + 14 files changed, 1630 insertions(+), 21 deletions(-) create mode 100644 sys/netipsec/ipsec_offload.c create mode 100644 sys/netipsec/ipsec_offload.h diff --git a/sys/conf/files b/sys/conf/files index 609ac407d400..1f99c3586b86 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4464,6 +4464,8 @@ netipsec/ipsec.c optional ipsec inet | ipsec inet6 netipsec/ipsec_input.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mbuf.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mod.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_offload.c optional ipsec ipsec_offload inet | \ + ipsec ipsec_offload inet6 netipsec/ipsec_output.c optional ipsec inet | ipsec inet6 netipsec/ipsec_pcb.c optional ipsec inet | ipsec inet6 | \ ipsec_support inet | ipsec_support inet6 diff --git a/sys/conf/options b/sys/conf/options index f50d009987bc..928927fe99df 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -466,6 +466,7 @@ IPFIREWALL_PMOD opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h +IPSEC_OFFLOAD opt_ipsec.h IPSTEALTH KERN_TLS KRPC diff --git a/sys/modules/ipsec/Makefile b/sys/modules/ipsec/Makefile index 08a2e88d5794..8979508375a4 100644 --- a/sys/modules/ipsec/Makefile +++ b/sys/modules/ipsec/Makefile @@ -2,8 +2,9 @@ .PATH: ${SRCTOP}/sys/net ${SRCTOP}/sys/netipsec KMOD= ipsec -SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c ipsec_mod.c \ - ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ +SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c \ + ipsec_mod.c ipsec_offload.c ipsec_output.c \ + xform_ah.c xform_esp.c xform_ipcomp.c \ opt_inet.h opt_inet6.h opt_ipsec.h opt_kern_tls.h opt_sctp.h .if "${MK_INET}" != "no" || "${MK_INET6}" != "no" SRCS+= udpencap.c diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index 0ca33424bca8..e22a3872d48d 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -85,6 +85,7 @@ #ifdef INET6 #include #endif +#include #include #include #include /*XXX*/ @@ -636,8 +637,16 @@ int ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); @@ -802,8 +811,16 @@ int ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 2a1dcb8bb77b..55cc0839eab9 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -71,6 +71,12 @@ struct ipsecrequest { u_int level; /* IPsec level defined below. */ }; +struct ipsec_accel_adddel_sp_tq { + struct vnet *adddel_vnet; + struct task adddel_task; + int adddel_scheduled; +}; + /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; @@ -102,6 +108,11 @@ struct secpolicy { time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ + CK_LIST_HEAD(, ifp_handle_sp) accel_ifps; + struct ipsec_accel_adddel_sp_tq accel_add_tq; + struct ipsec_accel_adddel_sp_tq accel_del_tq; + struct inpcb *ipsec_accel_add_sp_inp; + const char *accel_ifname; }; /* diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 1150f3f470d3..dbb20748cf45 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -237,6 +238,11 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) int ipsec4_input(struct mbuf *m, int offset, int proto) { + int error; + + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); switch (proto) { case IPPROTO_AH: @@ -536,7 +542,12 @@ ipsec6_lasthdr(int proto) int ipsec6_input(struct mbuf *m, int offset, int proto) { + int error; + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); + switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c new file mode 100644 index 000000000000..851bacaf4ea1 --- /dev/null +++ b/sys/netipsec/ipsec_offload.c @@ -0,0 +1,1061 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef IPSEC_OFFLOAD + +static struct mtx ipsec_accel_sav_tmp; +static struct unrhdr *drv_spi_unr; +static struct mtx ipsec_accel_cnt_lock; + +struct ipsec_accel_install_newkey_tq { + struct secasvar *sav; + struct vnet *install_vnet; + struct task install_task; +}; + +struct ipsec_accel_forget_tq { + struct vnet *forget_vnet; + struct task forget_task; + struct secasvar *sav; +}; + +struct ifp_handle_sav { + CK_LIST_ENTRY(ifp_handle_sav) sav_link; + CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link; + struct secasvar *sav; + struct ifnet *ifp; + void *ifdata; + uint64_t drv_spi; + uint32_t flags; + size_t hdr_ext_size; + uint64_t cnt_octets; + uint64_t cnt_allocs; +}; + +#define IFP_HS_HANDLED 0x00000001 +#define IFP_HS_REJECTED 0x00000002 +#define IFP_HS_INPUT 0x00000004 +#define IFP_HS_OUTPUT 0x00000008 +#define IFP_HS_MARKER 0x00000010 + +static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles; + +struct ifp_handle_sp { + CK_LIST_ENTRY(ifp_handle_sp) sp_link; + CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link; + struct secpolicy *sp; + struct ifnet *ifp; + void *ifdata; + uint32_t flags; +}; + +#define IFP_HP_HANDLED 0x00000001 +#define IFP_HP_REJECTED 0x00000002 +#define IFP_HP_MARKER 0x00000004 + +static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles; + +static void * +drvspi_sa_trie_alloc(struct pctrie *ptree) +{ + void *res; + + res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT); + if (res != NULL) + pctrie_zone_init(res, 0, 0); + return (res); +} + +static void +drvspi_sa_trie_free(struct pctrie *ptree, void *node) +{ + free(node, M_IPSEC_MISC); +} + +PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi, + drvspi_sa_trie_alloc, drvspi_sa_trie_free); +static struct pctrie drv_spi_pctrie; + +static void ipsec_accel_sa_newkey_impl(struct secasvar *sav); +static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires); +static void ipsec_accel_forget_sav_clear(struct secasvar *sav); +static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, + struct ifnet *ifp); +static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m); +static void ipsec_accel_sync_imp(void); +static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav); +static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav); + +static void +ipsec_accel_init(void *arg) +{ + mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0); + mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0); + drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN, + IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp); + ipsec_accel_sa_newkey_p = ipsec_accel_sa_newkey_impl; + ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl; + ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl; + ipsec_accel_spddel_p = ipsec_accel_spddel_impl; + ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl; + ipsec_accel_sync_p = ipsec_accel_sync_imp; + ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl; + ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl; + pctrie_init(&drv_spi_pctrie); +} +SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_init, NULL); + +static void +ipsec_accel_fini(void *arg) +{ + ipsec_accel_sa_newkey_p = NULL; + ipsec_accel_forget_sav_p = NULL; + ipsec_accel_spdadd_p = NULL; + ipsec_accel_spddel_p = NULL; + ipsec_accel_sa_lifetime_op_p = NULL; + ipsec_accel_sync_p = NULL; + ipsec_accel_is_accel_sav_p = NULL; + ipsec_accel_key_setaccelif_p = NULL; + ipsec_accel_sync_imp(); + clean_unrhdr(drv_spi_unr); /* avoid panic, should go later */ + clear_unrhdr(drv_spi_unr); + delete_unrhdr(drv_spi_unr); + mtx_destroy(&ipsec_accel_sav_tmp); + mtx_destroy(&ipsec_accel_cnt_lock); +} +SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_fini, NULL); + +static void +ipsec_accel_alloc_forget_tq(struct secasvar *sav) +{ + void *ftq; + + if (sav->accel_forget_tq != 0) + return; + + ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK); + if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq)) + free(ftq, M_TEMP); +} + +static bool +ipsec_accel_sa_install_match(if_t ifp, void *arg) +{ + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (false); + if (ifp->if_ipsec_accel_m->if_sa_newkey == NULL) { + printf("driver bug ifp %s if_sa_newkey NULL\n", + if_name(ifp)); + return (false); + } + return (true); +} + +static int +ipsec_accel_sa_newkey_cb(if_t ifp, void *arg) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *priv; + u_int drv_spi; + int error; + + tq = arg; + + printf("ipsec_accel_sa_newkey_act: ifp %s h %p spi %#x " + "flags %#x seq %d\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_sa_newkey, + be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq); + priv = NULL; + drv_spi = alloc_unr(drv_spi_unr); + if (tq->sav->accel_ifname != NULL && + strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + goto out; + } + if (drv_spi == -1) { + /* XXXKIB */ + printf("ipsec_accel_sa_install_newkey: cannot alloc " + "drv_spi if %s spi %#x\n", if_name(ifp), + be32toh(tq->sav->spi)); + return (ENOMEM); + } + error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav, + drv_spi, &priv); + if (error != 0) { + if (error == EOPNOTSUPP) { + printf("ipsec_accel_sa_newkey: driver " + "refused sa if %s spi %#x\n", + if_name(ifp), be32toh(tq->sav->spi)); + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + /* XXXKIB */ + } else { + printf("ipsec_accel_sa_newkey: driver " + "error %d if %s spi %#x\n", + error, if_name(ifp), be32toh(tq->sav->spi)); + /* XXXKIB */ + } + } else { + error = ipsec_accel_handle_sav(tq->sav, ifp, + drv_spi, priv, IFP_HS_HANDLED, NULL); + if (error != 0) { + /* XXXKIB */ + printf("ipsec_accel_sa_newkey: handle_sav " + "err %d if %s spi %#x\n", error, + if_name(ifp), be32toh(tq->sav->spi)); + } + } +out: + return (error); +} + +static void +ipsec_accel_sa_newkey_act(void *context, int pending) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *tqf; + struct secasvar *sav; + + tq = context; + tqf = NULL; + sav = tq->sav; + CURVNET_SET(tq->install_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) == 0 && + sav->state == SADB_SASTATE_MATURE) { + sav->accel_flags |= SADB_KEY_ACCEL_INST; + mtx_unlock(&ipsec_accel_sav_tmp); + if_foreach_sleep(ipsec_accel_sa_install_match, context, + ipsec_accel_sa_newkey_cb, context); + ipsec_accel_alloc_forget_tq(sav); + mtx_lock(&ipsec_accel_sav_tmp); + + /* + * If ipsec_accel_forget_sav() raced with us and set + * the flag, do its work. Its task cannot execute in + * parallel since taskqueue_thread is single-threaded. + */ + if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) { + tqf = (void *)sav->accel_forget_tq; + sav->accel_forget_tq = 0; + ipsec_accel_forget_sav_clear(sav); + } + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&tq->sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); + free(tqf, M_TEMP); +} + +static void +ipsec_accel_sa_newkey_impl(struct secasvar *sav) +{ + struct ipsec_accel_install_newkey_tq *tq; + + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) != 0) + return; + + printf( + "ipsec_accel_sa_install_newkey: spi %#x flags %#x seq %d\n", + be32toh(sav->spi), sav->flags, sav->seq); + + tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT); + if (tq == NULL) { + printf("ipsec_accel_sa_install_newkey: no memory for tq, " + "spi %#x\n", be32toh(sav->spi)); + /* XXXKIB */ + return; + } + + refcount_acquire(&sav->refcnt); + + TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_newkey_act, tq); + tq->sav = sav; + tq->install_vnet = curthread->td_vnet; /* XXXKIB liveness */ + taskqueue_enqueue(taskqueue_thread, &tq->install_task); +} + +static int +ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires) +{ + struct ifp_handle_sav *ihs, *i; + int error; + + MPASS(__bitcount(flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == 1); + + ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO); + ihs->ifp = ifp; + ihs->sav = sav; + ihs->drv_spi = drv_spi; + ihs->ifdata = priv; + ihs->flags = flags; + if ((flags & IFP_HS_OUTPUT) != 0) + ihs->hdr_ext_size = esp_hdrsiz(sav); + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) { + error = EALREADY; + goto errout; + } + } + error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs); + if (error != 0) + goto errout; + if_ref(ihs->ifp); + CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + if (ires != NULL) + *ires = ihs; + return (0); +errout: + mtx_unlock(&ipsec_accel_sav_tmp); + free(ihs, M_IPSEC_MISC); + if (ires != NULL) + *ires = NULL; + return (error); +} + +static void +ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav) +{ + struct ifnet *ifp; + struct secasvar *sav; + + mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED); + + CK_LIST_REMOVE(i, sav_link); + CK_LIST_REMOVE(i, sav_allh_link); + DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + ifp = i->ifp; + sav = i->sav; + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + printf("sa deinstall %s %p spi %#x ifl %#x\n", + if_name(ifp), sav, be32toh(sav->spi), i->flags); + ifp->if_ipsec_accel_m->if_sa_deinstall(ifp, + i->drv_spi, i->ifdata); + } + if_rele(ifp); + free_unr(drv_spi_unr, i->drv_spi); + free(i, M_IPSEC_MISC); + if (freesav) + key_freesav(&sav); + mtx_lock(&ipsec_accel_sav_tmp); +} + +static void +ipsec_accel_forget_sav_clear(struct secasvar *sav) +{ + struct ifp_handle_sav *i; + + for (;;) { + i = CK_LIST_FIRST(&sav->accel_ifps); + if (i == NULL) + break; + ipsec_accel_forget_handle_sav(i, false); + } +} + +static void +ipsec_accel_forget_sav_act(void *arg, int pending) +{ + struct ipsec_accel_forget_tq *tq; + struct secasvar *sav; + + tq = arg; + sav = tq->sav; + CURVNET_SET(tq->forget_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + ipsec_accel_forget_sav_clear(sav); + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); +} + +void +ipsec_accel_forget_sav_impl(struct secasvar *sav) +{ + struct ipsec_accel_forget_tq *tq; + + mtx_lock(&ipsec_accel_sav_tmp); + sav->accel_flags |= SADB_KEY_ACCEL_DEINST; + tq = (void *)atomic_load_ptr(&sav->accel_forget_tq); + if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq, + (uintptr_t)tq, 0)) { + mtx_unlock(&ipsec_accel_sav_tmp); + return; + } + mtx_unlock(&ipsec_accel_sav_tmp); + + refcount_acquire(&sav->refcnt); + TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq); + tq->forget_vnet = curthread->td_vnet; + tq->sav = sav; + taskqueue_enqueue(taskqueue_thread, &tq->forget_task); +} + +static void +ipsec_accel_on_ifdown_sav(struct ifnet *ifp) +{ + struct ifp_handle_sav *i, *marker; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker, + sav_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sav_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sav_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sav_allh_link); + if (i->ifp == ifp) { + refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */ + ipsec_accel_forget_handle_sav(i, true); + } + } + CK_LIST_REMOVE(marker, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp) +{ + struct ifp_handle_sav *i; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (NULL); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp) +{ + NET_EPOCH_ASSERT(); + return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp)); +} + +static bool +ipsec_accel_is_accel_sav_impl(struct secasvar *sav) +{ + return (!CK_LIST_EMPTY(&sav->accel_ifps)); +} + +static struct secasvar * +ipsec_accel_drvspi_to_sa(u_int drv_spi) +{ + struct ifp_handle_sav *i; + + i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi); + if (i == NULL) + return (NULL); + return (i->sav); +} + +static struct ifp_handle_sp * +ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp) +{ + struct ifp_handle_sp *i; + + CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static bool +ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp) +{ + return (ipsec_accel_find_accel_sp(sp, ifp) != NULL); +} + +static int +ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp, + struct ifp_handle_sp **ip) +{ + struct ifp_handle_sp *i; + + i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO); + i->sp = sp; + i->ifp = ifp; + if_ref(ifp); + i->flags = IFP_HP_HANDLED; + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + *ip = i; + return (0); +} + +static bool +ipsec_accel_spdadd_match(if_t ifp, void *arg) +{ + struct secpolicy *sp; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0 || + ifp->if_ipsec_accel_m->if_spdadd == NULL) + return (false); + sp = arg; + if (sp->accel_ifname != NULL && + strcmp(sp->accel_ifname, if_name(ifp)) != 0) + return (false); + if (ipsec_accel_is_accel_sp(sp, ifp)) + return (false); + return (true); +} + +static int +ipsec_accel_spdadd_cb(if_t ifp, void *arg) +{ + struct secpolicy *sp; + struct inpcb *inp; + struct ifp_handle_sp *i; + int error; + + sp = arg; + inp = sp->ipsec_accel_add_sp_inp; + printf("ipsec_accel_spdadd_cb: ifp %s m %p sp %p inp %p\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp); + error = ipsec_accel_remember_sp(sp, ifp, &i); + if (error != 0) { + printf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n", + if_name(ifp), sp, error); + return (error); + } + error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata); + if (error != 0) { + i->flags |= IFP_HP_REJECTED; + printf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n", + if_name(ifp), sp, error); + } + return (error); +} + +static void +ipsec_accel_spdadd_act(void *arg, int pending) +{ + struct secpolicy *sp; + struct inpcb *inp; + + sp = arg; + CURVNET_SET(sp->accel_add_tq.adddel_vnet); + if_foreach_sleep(ipsec_accel_spdadd_match, arg, + ipsec_accel_spdadd_cb, arg); + inp = sp->ipsec_accel_add_sp_inp; + if (inp != NULL) { + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); + sp->ipsec_accel_add_sp_inp = NULL; + } + CURVNET_RESTORE(); + key_freesp(&sp); +} + +void +ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + if (sp->tcount == 0 && inp == NULL) + return; + tq = &sp->accel_add_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + sp->ipsec_accel_add_sp_inp = inp; + if (inp != NULL) + in_pcbref(inp); + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_spddel_act(void *arg, int pending) +{ + struct ifp_handle_sp *i; + struct secpolicy *sp; + int error; + + sp = arg; + CURVNET_SET(sp->accel_del_tq.adddel_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + for (;;) { + i = CK_LIST_FIRST(&sp->accel_ifps); + if (i == NULL) + break; + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(i->ifp), sp); + error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp, + sp, i->ifdata); + if (error != 0) { + printf( + "ipsec_accel_spddel: %s if_spddel %p res %d\n", + if_name(i->ifp), sp, error); + } + } + if_rele(i->ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesp(&sp); + CURVNET_RESTORE(); +} + +void +ipsec_accel_spddel_impl(struct secpolicy *sp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + + tq = &sp->accel_del_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_on_ifdown_sp(struct ifnet *ifp) +{ + struct ifp_handle_sp *i, *marker; + struct secpolicy *sp; + int error; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker, + sp_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sp_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sp_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sp_allh_link); + if (i->ifp != ifp) + continue; + + sp = i->sp; + key_addref(sp); + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(ifp), sp); + error = ifp->if_ipsec_accel_m->if_spddel(ifp, + sp, i->ifdata); + } + if (error != 0) { + printf( + "ipsec_accel_on_ifdown_sp: %s if_spddel %p res %d\n", + if_name(ifp), sp, error); + } + key_freesp(&sp); + if_rele(ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + CK_LIST_REMOVE(marker, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +void +ipsec_accel_on_ifdown(struct ifnet *ifp) +{ + ipsec_accel_on_ifdown_sp(ifp); + ipsec_accel_on_ifdown_sav(ifp); +} + +static bool +ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu) +{ + int alen, blks, hlen, padding, rlen; + + rlen = m->m_pkthdr.len - skip; + hlen = ((sav->flags & SADB_X_EXT_OLD) != 0 ? sizeof(struct esp) : + sizeof(struct newesp)) + sav->ivlen; + blks = MAX(4, SAV_ISCTR(sav) && VNET(esp_ctr_compatibility) ? + sav->tdb_encalgxform->native_blocksize : + sav->tdb_encalgxform->blocksize); + padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; + alen = xform_ah_authsize(sav->tdb_authalgxform); + + return (skip + hlen + rlen + padding + alen <= mtu); +} + +static bool +ipsec_accel_output_tag(struct mbuf *m, u_int drv_spi) +{ + struct ipsec_accel_out_tag *tag; + + tag = (struct ipsec_accel_out_tag *)m_tag_get( + PACKET_TAG_IPSEC_ACCEL_OUT, sizeof(*tag), M_NOWAIT); + if (tag == NULL) + return (false); + tag->drv_spi = drv_spi; + m_tag_prepend(m, &tag->tag); + return (true); +} + +bool +ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + struct secpolicy *sp, struct secasvar *sav, int af, int mtu) +{ + struct ifp_handle_sav *i; + struct ip *ip; + u_long ip_len, skip; + + if (ifp == NULL) + return (false); + + M_ASSERTPKTHDR(m); + NET_EPOCH_ASSERT(); + + if (sav == NULL) + return (ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS)); + + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i == NULL) + return (false); + + if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { + ip_len = m->m_pkthdr.len; + if (ip_len + i->hdr_ext_size > mtu) + return (false); + switch (af) { + case AF_INET: + ip = mtod(m, struct ip *); + skip = ip->ip_hl << 2; + break; + case AF_INET6: + skip = sizeof(struct ip6_hdr); + break; + default: + __unreachable(); + } + if (!ipsec_accel_output_pad(m, sav, skip, mtu)) + return (false); + } + + if (!ipsec_accel_output_tag(m, i->drv_spi)) + return (false); + + ipsec_accel_sa_recordxfer(sav, m); + key_freesav(&sav); + if (sp != NULL) + key_freesp(&sp); + + return (true); +} + +struct ipsec_accel_in_tag * +ipsec_accel_input_tag_lookup(const struct mbuf *m) +{ + struct ipsec_accel_in_tag *tag; + struct m_tag *xtag; + + xtag = m_tag_find(__DECONST(struct mbuf *, m), + PACKET_TAG_IPSEC_ACCEL_IN, NULL); + if (xtag == NULL) + return (NULL); + tag = __containerof(xtag, struct ipsec_accel_in_tag, tag); + return (tag); +} + +int +ipsec_accel_input(struct mbuf *m, int offset, int proto) +{ + struct secasvar *sav; + struct ipsec_accel_in_tag *tag; + + tag = ipsec_accel_input_tag_lookup(m); + if (tag == NULL) + return (ENXIO); + + if (tag->drv_spi < IPSEC_ACCEL_DRV_SPI_MIN || + tag->drv_spi > IPSEC_ACCEL_DRV_SPI_MAX) { + printf("if %s mbuf %p drv_spi %d invalid, packet dropped\n", + (m->m_flags & M_PKTHDR) != 0 ? if_name(m->m_pkthdr.rcvif) : + "", m, tag->drv_spi); + m_freem(m); + return (EINPROGRESS); + } + + sav = ipsec_accel_drvspi_to_sa(tag->drv_spi); + if (sav != NULL) + ipsec_accel_sa_recordxfer(sav, m); + return (0); +} + +static void +ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m) +{ + counter_u64_add(sav->accel_lft_sw, 1); + counter_u64_add(sav->accel_lft_sw + 1, m->m_pkthdr.len); + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; +} + +static void +ipsec_accel_sa_lifetime_update(struct seclifetime *lft_c, + const struct seclifetime *lft_l) +{ + lft_c->allocations += lft_l->allocations; + lft_c->bytes += lft_l->bytes; + lft_c->usetime = min(lft_c->usetime, lft_l->usetime); +} + +void +ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + u_int drv_spi, uint64_t octets, uint64_t allocs) +{ + struct epoch_tracker et; + struct ifp_handle_sav *i; + uint64_t odiff, adiff; + + NET_EPOCH_ENTER(et); + mtx_lock(&ipsec_accel_cnt_lock); + + if (allocs != 0) { + if (sav->firstused == 0) + sav->firstused = time_second; + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; + } + + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp && i->drv_spi == drv_spi) + break; + } + if (i == NULL) + goto out; + + odiff = octets - i->cnt_octets; + adiff = allocs - i->cnt_allocs; + + if (sav->lft_c != NULL) { + counter_u64_add(sav->lft_c_bytes, odiff); + counter_u64_add(sav->lft_c_allocations, adiff); + } + + i->cnt_octets = octets; + i->cnt_allocs = allocs; + sav->accel_hw_octets += odiff; + sav->accel_hw_allocs += adiff; + +out: + mtx_unlock(&ipsec_accel_cnt_lock); + NET_EPOCH_EXIT(et); +} + +static void +ipsec_accel_sa_lifetime_hw(struct secasvar *sav, if_t ifp, + struct seclifetime *lft) +{ + struct ifp_handle_sav *i; + if_sa_cnt_fn_t p; + + IFNET_RLOCK_ASSERT(); + + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i != NULL && (i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + p = ifp->if_ipsec_accel_m->if_sa_cnt; + if (p != NULL) + p(ifp, sav, i->drv_spi, i->ifdata, lft); + } +} + +static int +ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + struct seclifetime lft_l, lft_s; + struct ifp_handle_sav *i; + if_t ifp1; + if_sa_cnt_fn_t p; + int error; + + error = 0; + memset(&lft_l, 0, sizeof(lft_l)); + memset(&lft_s, 0, sizeof(lft_s)); + + switch (op & ~IF_SA_CNT_UPD) { + case IF_SA_CNT_IFP_HW_VAL: + ipsec_accel_sa_lifetime_hw(sav, ifp, &lft_l); + ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); + break; + + case IF_SA_CNT_TOTAL_SW_VAL: + lft_l.allocations = (uint32_t)counter_u64_fetch( + sav->accel_lft_sw); + lft_l.bytes = counter_u64_fetch(sav->accel_lft_sw + 1); + lft_l.usetime = sav->accel_firstused; + break; + + case IF_SA_CNT_TOTAL_HW_VAL: + IFNET_RLOCK_ASSERT(); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) != + IFP_HS_HANDLED) + continue; + ifp1 = i->ifp; + p = ifp1->if_ipsec_accel_m->if_sa_cnt; + if (p == NULL) + continue; + memset(&lft_s, 0, sizeof(lft_s)); + if (sahtree_trackerp != NULL) + ipsec_sahtree_runlock(sahtree_trackerp); + error = p(ifp1, sav, i->drv_spi, i->ifdata, &lft_s); + if (sahtree_trackerp != NULL) + ipsec_sahtree_rlock(sahtree_trackerp); + if (error == 0) + ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); + } + break; + } + + if (error == 0) { + if ((op & IF_SA_CNT_UPD) == 0) + memset(lft_c, 0, sizeof(*lft_c)); + ipsec_accel_sa_lifetime_update(lft_c, &lft_l); + } + + return (error); +} + +static void +ipsec_accel_sync_imp(void) +{ + taskqueue_drain_all(taskqueue_thread); +} + +static struct mbuf * +ipsec_accel_key_setaccelif_impl(struct secasvar *sav) +{ + struct mbuf *m, *m1; + struct ifp_handle_sav *i; + struct epoch_tracker et; + + if (sav->accel_ifname != NULL) + return (key_setaccelif(sav->accel_ifname)); + + m = m1 = NULL; + + NET_EPOCH_ENTER(et); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + m1 = key_setaccelif(if_name(i->ifp)); + if (m == NULL) + m = m1; + else if (m1 != NULL) + m_cat(m, m1); + } + } + NET_EPOCH_EXIT(et); + return (m); +} + +#endif /* IPSEC_OFFLOAD */ diff --git a/sys/netipsec/ipsec_offload.h b/sys/netipsec/ipsec_offload.h new file mode 100644 index 000000000000..87e2a33288be --- /dev/null +++ b/sys/netipsec/ipsec_offload.h @@ -0,0 +1,191 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETIPSEC_IPSEC_OFFLOAD_H_ +#define _NETIPSEC_IPSEC_OFFLOAD_H_ + +#ifdef _KERNEL +#include +#include +#include + +struct secpolicy; +struct secasvar; +struct inpcb; + +struct ipsec_accel_out_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +struct ipsec_accel_in_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +#define IPSEC_ACCEL_DRV_SPI_BYPASS 2 +#define IPSEC_ACCEL_DRV_SPI_MIN 3 +#define IPSEC_ACCEL_DRV_SPI_MAX 0xffff + +extern void (*ipsec_accel_sa_newkey_p)(struct secasvar *sav); +extern void (*ipsec_accel_sa_install_input_p)(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi); +extern void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +extern void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +extern void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +extern int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +extern void (*ipsec_accel_sync_p)(void); +extern bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +extern struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); + +#ifdef IPSEC_OFFLOAD +/* + * Have to use ipsec_accel_sa_install_input_p indirection because + * key.c is unconditionally included into the static kernel. + */ +static inline void +ipsec_accel_sa_newkey(struct secasvar *sav) +{ + void (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_sa_newkey_p); + if (p != NULL) + p(sav); +} + +static inline void +ipsec_accel_forget_sav(struct secasvar *sav) +{ + void (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_forget_sav_p); + if (p != NULL) + p(sav); +} + +static inline void +ipsec_accel_spdadd(struct secpolicy *sp, struct inpcb *inp) +{ + void (*p)(struct secpolicy *sp, struct inpcb *inp); + + p = atomic_load_ptr(&ipsec_accel_spdadd_p); + if (p != NULL) + p(sp, inp); +} + +static inline void +ipsec_accel_spddel(struct secpolicy *sp) +{ + void (*p)(struct secpolicy *sp); + + p = atomic_load_ptr(&ipsec_accel_spddel_p); + if (p != NULL) + p(sp); +} + +static inline int +ipsec_accel_sa_lifetime_op(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + int (*p)(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, + enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp); + + p = atomic_load_ptr(&ipsec_accel_sa_lifetime_op_p); + if (p != NULL) + return (p(sav, lft_c, ifp, op, sahtree_trackerp)); + return (ENOTSUP); +} + +static inline void +ipsec_accel_sync(void) +{ + void (*p)(void); + + p = atomic_load_ptr(&ipsec_accel_sync_p); + if (p != NULL) + p(); +} + +static inline bool +ipsec_accel_is_accel_sav(struct secasvar *sav) +{ + bool (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_is_accel_sav_p); + if (p != NULL) + return (p(sav)); + return (false); +} + +static inline struct mbuf * +ipsec_accel_key_setaccelif(struct secasvar *sav) +{ + struct mbuf *(*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_key_setaccelif_p); + if (p != NULL) + return (p(sav)); + return (NULL); +} + + +#else +#define ipsec_accel_sa_newkey(a) +#define ipsec_accel_forget_sav(a) +#define ipsec_accel_spdadd(a, b) +#define ipsec_accel_spddel(a) +#define ipsec_accel_sa_lifetime_op(a, b, c, d, e) +#define ipsec_accel_sync() +#define ipsec_accel_is_accel_sav(a) +#define ipsec_accel_key_setaccelif(a) +#endif + +void ipsec_accel_forget_sav_impl(struct secasvar *sav); +void ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp); +void ipsec_accel_spddel_impl(struct secpolicy *sp); + +#ifdef IPSEC_OFFLOAD +int ipsec_accel_input(struct mbuf *m, int offset, int proto); +bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, + struct inpcb *inp, struct secpolicy *sp, struct secasvar *sav, int af, + int mtu); +void ipsec_accel_forget_sav(struct secasvar *sav); +#else +#define ipsec_accel_input(a, b, c) (ENXIO) +#define ipsec_accel_output(a, b, c, d, e, f, g) (false) +#define ipsec_accel_forget_sav(a) +#endif + +struct ipsec_accel_in_tag *ipsec_accel_input_tag_lookup(const struct mbuf *); +void ipsec_accel_on_ifdown(struct ifnet *ifp); +void ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + u_int drv_spi, uint64_t octets, uint64_t allocs); + +#endif /* _KERNEL */ + +#endif /* _NETIPSEC_IPSEC_OFFLOAD_H_ */ diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index be996f257b64..8f49bc8fce24 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -84,6 +84,7 @@ #include #endif #include +#include #include #include #include @@ -210,6 +211,8 @@ ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET, mtu); key_freesp(&sp); return (error); } @@ -222,6 +225,9 @@ ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu)) + return (EJUSTRETURN); + ip = mtod(m, struct ip *); dst = &sav->sah->saidx.dst; /* Do the appropriate encapsulation, if necessary */ @@ -597,6 +603,8 @@ ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET6, mtu); key_freesp(&sp); return (error); } @@ -611,6 +619,9 @@ ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu)) + return (EJUSTRETURN); + ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ dst = &sav->sah->saidx.dst; @@ -859,6 +870,10 @@ ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, struct m_tag *mtag; int error; + if (sav->state >= SADB_SASTATE_DEAD) { + error = ESRCH; + goto bad; + } saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET diff --git a/sys/netipsec/ipsec_pcb.c b/sys/netipsec/ipsec_pcb.c index 38a94907cc48..497bc5e3b2f3 100644 --- a/sys/netipsec/ipsec_pcb.c +++ b/sys/netipsec/ipsec_pcb.c @@ -49,6 +49,7 @@ #include #include #include +#include MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); @@ -166,18 +167,26 @@ ipsec_init_pcbpolicy(struct inpcb *inp) int ipsec_delete_pcbpolicy(struct inpcb *inp) { + struct inpcbpolicy *inp_sp; - if (inp->inp_sp == NULL) + inp_sp = inp->inp_sp; + if (inp_sp == NULL) return (0); - - if (inp->inp_sp->sp_in != NULL) - key_freesp(&inp->inp_sp->sp_in); - - if (inp->inp_sp->sp_out != NULL) - key_freesp(&inp->inp_sp->sp_out); - - free(inp->inp_sp, M_IPSEC_INPCB); inp->inp_sp = NULL; + + if (inp_sp->sp_in != NULL) { + if ((inp_sp->flags & INP_INBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_in); + key_freesp(&inp_sp->sp_in); + } + + if (inp_sp->sp_out != NULL) { + if ((inp_sp->flags & INP_OUTBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_out); + key_freesp(&inp_sp->sp_out); + } + + free(inp_sp, M_IPSEC_INPCB); return (0); } @@ -248,20 +257,26 @@ ipsec_copy_pcbpolicy(struct inpcb *old, struct inpcb *new) if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_INBOUND); - if (new->inp_sp->sp_in != NULL) + if (new->inp_sp->sp_in != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_in); key_freesp(&new->inp_sp->sp_in); + } new->inp_sp->sp_in = sp; new->inp_sp->flags |= INP_INBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } if (old->inp_sp->flags & INP_OUTBOUND_POLICY) { sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_out); if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_OUTBOUND); - if (new->inp_sp->sp_out != NULL) + if (new->inp_sp->sp_out != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_out); key_freesp(&new->inp_sp->sp_out); + } new->inp_sp->sp_out = sp; new->inp_sp->flags |= INP_OUTBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } return (0); } @@ -339,8 +354,10 @@ ipsec_set_pcbpolicy(struct inpcb *inp, struct ucred *cred, flags = INP_OUTBOUND_POLICY; } /* Clear old SP and set new SP. */ - if (*spp != NULL) + if (*spp != NULL) { + ipsec_accel_spddel(*spp); key_freesp(spp); + } *spp = newsp; KEYDBG(IPSEC_DUMP, printf("%s: new SP(%p)\n", __func__, newsp)); @@ -348,6 +365,7 @@ ipsec_set_pcbpolicy(struct inpcb *inp, struct ucred *cred, inp->inp_sp->flags &= ~flags; else { inp->inp_sp->flags |= flags; + ipsec_accel_spdadd(newsp, inp); KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); } INP_WUNLOCK(inp); diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index 501f5c0a7339..38dd2bc5c1a6 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #ifdef INET6 @@ -90,12 +91,26 @@ #endif #include +#include #include #include /* randomness */ #include +#ifdef IPSEC_OFFLOAD +void (*ipsec_accel_sa_newkey_p)(struct secasvar *sav); +void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +void (*ipsec_accel_sync_p)(void); +bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); +#endif + #define FULLMASK 0xff #define _BITS(bytes) ((bytes) << 3) @@ -391,6 +406,9 @@ static const int minsize[] = { [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = sizeof(struct sadb_address), [SADB_X_EXT_NEW_ADDRESS_DST] = sizeof(struct sadb_address), + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(minsize) == SADB_EXT_MAX + 1, "minsize size mismatch"); @@ -424,6 +442,9 @@ static const int maxsize[] = { [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = 0, [SADB_X_EXT_NEW_ADDRESS_DST] = 0, + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(maxsize) == SADB_EXT_MAX + 1, "maxsize size mismatch"); @@ -661,7 +682,7 @@ static int key_updateaddresses(struct socket *, struct mbuf *, const struct sadb_msghdr *, struct secasvar *, struct secasindex *); static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, - u_int8_t, u_int32_t, u_int32_t); + u_int8_t, u_int32_t, u_int32_t, struct rm_priotracker *); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, u_int32_t, pid_t, u_int16_t); static struct mbuf *key_setsadbsa(struct secasvar *); @@ -1227,6 +1248,11 @@ key_freesp(struct secpolicy **spp) KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); *spp = NULL; +#ifdef IPSEC_OFFLOAD + KASSERT(CK_LIST_EMPTY(&sp->accel_ifps), + ("key_freesp: sp %p still offloaded", sp)); + free(__DECONST(char *, sp->accel_ifname), M_IPSEC_MISC); +#endif while (sp->tcount > 0) ipsec_delisr(sp->req[--sp->tcount]); free(sp, M_IPSEC_SP); @@ -1240,6 +1266,7 @@ key_unlink(struct secpolicy *sp) SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); key_freesp(&sp); } @@ -1258,6 +1285,7 @@ key_detach(struct secpolicy *sp) return; } sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); V_spd_size--; LIST_REMOVE(sp, idhash); @@ -1285,6 +1313,7 @@ done: newsp->state = IPSEC_SPSTATE_ALIVE; V_spd_size++; V_sp_genid++; + ipsec_accel_spdadd(newsp, NULL); } /* @@ -1329,6 +1358,7 @@ key_register_ifnet(struct secpolicy **spp, u_int count) */ LIST_INSERT_HEAD(SPHASH_HASH(spp[i]->id), spp[i], idhash); spp[i]->state = IPSEC_SPSTATE_IFNET; + ipsec_accel_spdadd(spp[i], NULL); } SPTREE_WUNLOCK(); /* @@ -1357,6 +1387,7 @@ key_unregister_ifnet(struct secpolicy **spp, u_int count) if (spp[i]->state != IPSEC_SPSTATE_IFNET) continue; spp[i]->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(spp[i]); TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], spp[i], chain); V_spd_size--; @@ -1365,6 +1396,7 @@ key_unregister_ifnet(struct secpolicy **spp, u_int count) SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); for (i = 0; i < count; i++) { m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); @@ -1424,6 +1456,7 @@ key_unlinksav(struct secasvar *sav) /* Unlink from SPI hash */ LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sah = sav->sah; SAHTREE_WUNLOCK(); key_freesav(&sav); @@ -1821,6 +1854,9 @@ key_sp2msg(struct secpolicy *sp, void *request, size_t *len) size_t xlen, ilen; caddr_t p; int error, i; +#ifdef IPSEC_OFFLOAD + struct sadb_x_if_hw_offl *xif; +#endif IPSEC_ASSERT(sp != NULL, ("null policy")); @@ -1876,6 +1912,18 @@ key_sp2msg(struct secpolicy *sp, void *request, size_t *len) } } xpl->sadb_x_policy_len = PFKEY_UNIT64(xlen); +#ifdef IPSEC_OFFLOAD + if (error == 0 && sp->accel_ifname != NULL) { + xif = (struct sadb_x_if_hw_offl *)(xpl + 1); + bzero(xif, sizeof(*xif)); + xif->sadb_x_if_hw_offl_len = PFKEY_UNIT64(sizeof(*xif)); + xif->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + xif->sadb_x_if_hw_offl_flags = 0; + strncpy(xif->sadb_x_if_hw_offl_if, sp->accel_ifname, + sizeof(xif->sadb_x_if_hw_offl_if)); + xlen += sizeof(*xif); + } +#endif if (error == 0) *len = xlen; else @@ -2088,6 +2136,27 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; bcopy(&spidx, &newsp->spidx, sizeof(spidx)); +#ifdef IPSEC_OFFLOAD + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + newsp->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsp->accel_ifname == NULL) { + ipseclog((LOG_DEBUG, "%s: cannot alloc accel_ifname.\n", + __func__)); + key_freesp(&newsp); + return (key_senderror(so, m, error)); + } + strncpy(__DECONST(char *, newsp->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } + +#endif SPTREE_WLOCK(); if ((newsp->id = key_getnewspid()) == 0) { @@ -2095,6 +2164,7 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) key_detach(oldsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2109,6 +2179,7 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) key_insertsp(newsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2290,6 +2361,7 @@ key_spddelete(struct socket *so, struct mbuf *m, KEYDBG(KEY_STAMP, printf("%s: SP(%p)\n", __func__, sp)); KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + ipsec_accel_spddel(sp); key_unlink(sp); key_freesp(&sp); @@ -2561,6 +2633,7 @@ key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) */ TAILQ_FOREACH(sp, &drainq, chain) { sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); LIST_REMOVE(sp, idhash); } V_sp_genid++; @@ -2764,6 +2837,10 @@ key_getspreqmsglen(struct secpolicy *sp) tlen += PFKEY_ALIGN8(len); } +#ifdef IPSEC_OFFLOAD + if (sp->accel_ifname != NULL) + tlen += sizeof(struct sadb_x_if_hw_offl); +#endif return (tlen); } @@ -3005,6 +3082,32 @@ key_newsav(const struct sadb_msghdr *mhp, struct secasindex *saidx, sav->state = SADB_SASTATE_LARVAL; sav->pid = (pid_t)mhp->msg->sadb_msg_pid; SAV_INITREF(sav); +#ifdef IPSEC_OFFLOAD + CK_LIST_INIT(&sav->accel_ifps); + sav->accel_forget_tq = 0; + sav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (sav->accel_lft_sw == NULL) { + *errp = ENOBUFS; + goto done; + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + sav->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (sav->accel_ifname == NULL) { + *errp = ENOBUFS; + goto done; + } + strncpy(__DECONST(char *, sav->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } +#endif again: sah = key_getsah(saidx); if (sah == NULL) { @@ -3068,9 +3171,10 @@ again: SAH_ADDREF(sah); } /* Link SAV with SAH */ - if (sav->state == SADB_SASTATE_MATURE) + if (sav->state == SADB_SASTATE_MATURE) { TAILQ_INSERT_HEAD(&sah->savtree_alive, sav, chain); - else + ipsec_accel_sa_newkey(sav); + } else TAILQ_INSERT_HEAD(&sah->savtree_larval, sav, chain); /* Add SAV into SPI hash */ LIST_INSERT_HEAD(SAVHASH_HASH(sav->spi), sav, spihash); @@ -3085,6 +3189,13 @@ done: } if (sav->lft_c != NULL) uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); +#ifdef IPSEC_OFFLOAD + if (sav->accel_lft_sw != NULL) + uma_zfree_pcpu(ipsec_key_lft_zone, + sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), + M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA), sav = NULL; } if (sah != NULL) @@ -3153,6 +3264,10 @@ key_delsav(struct secasvar *sav) ("attempt to free non DEAD SA %p", sav)); IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); +#ifdef IPSEC_OFFLOAD + KASSERT(CK_LIST_EMPTY(&sav->accel_ifps), + ("key_unlinksav: sav %p still offloaded", sav)); +#endif /* * SA must be unlinked from the chain and hashtbl. @@ -3165,6 +3280,11 @@ key_delsav(struct secasvar *sav) free(sav->lock, M_IPSEC_MISC); uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); } +#ifdef IPSEC_OFFLOAD + /* XXXKIB should this be moved to key_cleansav()? */ + uma_zfree_pcpu(ipsec_key_lft_zone, sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA); } @@ -3588,7 +3708,7 @@ fail: */ static struct mbuf * key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, - uint32_t seq, uint32_t pid) + uint32_t seq, uint32_t pid, struct rm_priotracker *sahtree_trackerp) { struct seclifetime lft_c; struct mbuf *result = NULL, *tres = NULL, *m; @@ -3604,8 +3724,15 @@ key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, +#ifdef IPSEC_OFFLOAD + SADB_X_EXT_LFT_CUR_SW_OFFL, SADB_X_EXT_LFT_CUR_HW_OFFL, + SADB_X_EXT_IF_HW_OFFL, +#endif }; uint32_t replay_count; +#ifdef IPSEC_OFFLOAD + int error; +#endif SECASVAR_RLOCK_TRACKER; @@ -3752,6 +3879,44 @@ key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; +#ifdef IPSEC_OFFLOAD + case SADB_X_EXT_LFT_CUR_SW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + SAV_ADDREF(sav); + error = ipsec_accel_sa_lifetime_op(sav, &lft_c, + NULL, IF_SA_CNT_TOTAL_SW_VAL, sahtree_trackerp); + if (error != 0) { + m = NULL; + goto fail; + } + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + key_freesav(&sav); + if (sav == NULL) { + m_freem(m); + goto fail; + } + break; + case SADB_X_EXT_LFT_CUR_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + memset(&lft_c, 0, sizeof(lft_c)); + lft_c.bytes = sav->accel_hw_octets; + lft_c.allocations = sav->accel_hw_allocs; + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + break; + case SADB_X_EXT_IF_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + m = ipsec_accel_key_setaccelif(sav); + if (m == NULL) + continue; /* benigh */ + break; +#endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: @@ -4502,6 +4667,7 @@ key_flush_spd(time_t now) V_spd_size--; LIST_REMOVE(sp, idhash); sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); sp = nextsp; } V_sp_genid++; @@ -4625,6 +4791,7 @@ key_flush_sad(time_t now) TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Unlink all SAs with expired HARD lifetime */ @@ -4641,6 +4808,7 @@ key_flush_sad(time_t now) TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Mark all SAs with expired SOFT lifetime as DYING */ @@ -5239,6 +5407,30 @@ key_updateaddresses(struct socket *so, struct mbuf *m, /* Clone SA's content into newsav */ SAV_INITREF(newsav); bcopy(sav, newsav, offsetof(struct secasvar, chain)); +#ifdef IPSEC_OFFLOAD + CK_LIST_INIT(&newsav->accel_ifps); + newsav->accel_forget_tq = 0; + newsav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (newsav->accel_lft_sw == NULL) { + error = ENOBUFS; + goto fail; + } + if (sav->accel_ifname != NULL) { + struct sadb_x_if_hw_offl xof; + + newsav->accel_ifname = malloc(sizeof(xof.sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsav->accel_ifname == NULL) { + error = ENOBUFS; + goto fail; + } + strncpy(__DECONST(char *, sav->accel_ifname), + newsav->accel_ifname, + sizeof(xof.sadb_x_if_hw_offl_if)); + } +#endif + /* * We create new NAT-T config if it is needed. * Old NAT-T config will be freed by key_cleansav() when @@ -5269,6 +5461,7 @@ key_updateaddresses(struct socket *so, struct mbuf *m, TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); /* * Link new SA with SAH. Keep SAs ordered by @@ -5326,6 +5519,10 @@ fail: if (isnew != 0) key_freesah(&sah); if (newsav != NULL) { +#ifdef IPSEC_OFFLOAD + uma_zfree_pcpu(ipsec_key_lft_zone, newsav->accel_lft_sw); + free(__DECONST(char *, newsav->accel_ifname), M_IPSEC_MISC); +#endif if (newsav->natt != NULL) free(newsav->natt, M_IPSEC_MISC); free(newsav, M_IPSEC_SA); @@ -5540,6 +5737,7 @@ key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) KEYDBG(KEY_STAMP, printf("%s: SA(%p)\n", __func__, sav)); KEYDBG(KEY_DATA, kdebug_secasv(sav)); + ipsec_accel_sa_newkey(sav); key_freesav(&sav); { @@ -5692,6 +5890,7 @@ key_add(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) KEYDBG(KEY_STAMP, printf("%s: return SA(%p)\n", __func__, sav)); KEYDBG(KEY_DATA, kdebug_secasv(sav)); + ipsec_accel_sa_newkey(sav); /* * If SADB_ADD was in response to SADB_ACQUIRE, we need to schedule * ACQ for deletion. @@ -6196,6 +6395,7 @@ key_delete_all(struct socket *so, struct mbuf *m, /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6264,6 +6464,7 @@ key_delete_xform(const struct xformsw *xsp) /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6372,7 +6573,7 @@ key_get(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, - mhp->msg->sadb_msg_pid); + mhp->msg->sadb_msg_pid, NULL); key_freesav(&sav); if (!n) @@ -7614,9 +7815,11 @@ key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) */ TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -7638,10 +7841,12 @@ key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } /* Add SAH into flushq */ TAILQ_INSERT_HEAD(&flushq, sah, chain); @@ -7705,6 +7910,7 @@ key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* count sav entries to be sent to the userland. */ cnt = 0; + IFNET_RLOCK(); SAHTREE_RLOCK(); TAILQ_FOREACH(sah, &V_sahtree, chain) { if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && @@ -7719,6 +7925,7 @@ key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) if (cnt == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOENT); } @@ -7731,30 +7938,34 @@ key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); m_freem(m); return (0); } @@ -8175,6 +8386,11 @@ key_align(struct mbuf *m, struct sadb_msghdr *mhp) case SADB_X_EXT_SA_REPLAY: case SADB_X_EXT_NEW_ADDRESS_SRC: case SADB_X_EXT_NEW_ADDRESS_DST: +#ifdef IPSEC_OFFLOAD + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + case SADB_X_EXT_IF_HW_OFFL: +#endif /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -8483,9 +8699,11 @@ key_vnet_destroy(void *arg __unused) sah->state = SADB_SASTATE_DEAD; TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -8633,6 +8851,32 @@ key_setkey(struct seckey *src, uint16_t exttype) return m; } +#ifdef IPSEC_OFFLOAD +struct mbuf * +key_setaccelif(const char *ifname) +{ + struct mbuf *m = NULL; + struct sadb_x_if_hw_offl *p; + int len = PFKEY_ALIGN8(sizeof(*p)); + + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (m); + m_align(m, len); + m->m_len = len; + p = mtod(m, struct sadb_x_if_hw_offl *); + + bzero(p, len); + p->sadb_x_if_hw_offl_len = PFKEY_UNIT64(len); + p->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + p->sadb_x_if_hw_offl_flags = 0; + strncpy(p->sadb_x_if_hw_offl_if, ifname, + sizeof(p->sadb_x_if_hw_offl_if)); + + return (m); +} +#endif + /* * Take one of the kernel's lifetime data structures and convert it * into a PF_KEY structure within an mbuf, suitable for sending up to @@ -8708,3 +8952,15 @@ comp_algorithm_lookup(int alg) return (supported_calgs[i].xform); return (NULL); } + +void +ipsec_sahtree_runlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_runlock(&sahtree_lock, sahtree_trackerp); +} + +void +ipsec_sahtree_rlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_rlock(&sahtree_lock, sahtree_trackerp); +} diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h index d62426e6733e..ca0c9036800a 100644 --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -36,6 +36,7 @@ #ifdef _KERNEL +struct mbuf; struct secpolicy; struct secpolicyindex; struct secasvar; @@ -60,6 +61,7 @@ int key_havesp_any(void); void key_bumpspgen(void); uint32_t key_getspgen(void); uint32_t key_newreqid(void); +struct mbuf *key_setaccelif(const char *ifname); struct secasvar *key_allocsa(union sockaddr_union *, uint8_t, uint32_t); struct secasvar *key_allocsa_tunnel(union sockaddr_union *, @@ -85,6 +87,10 @@ extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); uint16_t key_portfromsaddr(struct sockaddr *); void key_porttosaddr(struct sockaddr *, uint16_t port); +struct rm_priotracker; +void ipsec_sahtree_runlock(struct rm_priotracker *); +void ipsec_sahtree_rlock(struct rm_priotracker *); + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPSEC_SA); MALLOC_DECLARE(M_IPSEC_SAH); diff --git a/sys/netipsec/key_debug.c b/sys/netipsec/key_debug.c index dcb542b22ad8..ead5fe80115b 100644 --- a/sys/netipsec/key_debug.c +++ b/sys/netipsec/key_debug.c @@ -155,6 +155,8 @@ kdebug_sadb_exttype(uint16_t type) X_NAME(SA_REPLAY); X_NAME(NEW_ADDRESS_SRC); X_NAME(NEW_ADDRESS_DST); + X_NAME(LFT_CUR_SW_OFFL); + X_NAME(LFT_CUR_HW_OFFL); default: return ("UNKNOWN"); }; @@ -251,6 +253,9 @@ kdebug_sadb(struct sadb_msg *base) case SADB_X_EXT_NAT_T_DPORT: kdebug_sadb_x_natt(ext); break; + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + kdebug_sadb_lifetime(ext); default: printf("%s: invalid ext_type %u\n", __func__, ext->sadb_ext_type); diff --git a/sys/netipsec/keydb.h b/sys/netipsec/keydb.h index 041a5ce1293c..ccc4a68e78fb 100644 --- a/sys/netipsec/keydb.h +++ b/sys/netipsec/keydb.h @@ -36,9 +36,11 @@ #ifdef _KERNEL #include +#include #include #include #include +#include #include #include @@ -125,6 +127,7 @@ struct xformsw; struct enc_xform; struct auth_hash; struct comp_algo; +struct ifp_handle_sav; /* * Security Association @@ -185,8 +188,19 @@ struct secasvar { uint64_t cntr; /* counter for GCM and CTR */ volatile u_int refcnt; /* reference count */ + CK_LIST_HEAD(, ifp_handle_sav) accel_ifps; + uintptr_t accel_forget_tq; + const char *accel_ifname; + uint32_t accel_flags; + counter_u64_t accel_lft_sw; + uint64_t accel_hw_allocs; + uint64_t accel_hw_octets; + uint64_t accel_firstused; }; +#define SADB_KEY_ACCEL_INST 0x00000001 +#define SADB_KEY_ACCEL_DEINST 0x00000002 + #define SECASVAR_RLOCK_TRACKER struct rm_priotracker _secas_tracker #define SECASVAR_RLOCK(_sav) rm_rlock((_sav)->lock, &_secas_tracker) #define SECASVAR_RUNLOCK(_sav) rm_runlock((_sav)->lock, &_secas_tracker) From 240b7bfe56f25b20ee248e4fb4a3232c5f13390d Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 14 Feb 2024 14:52:56 +0200 Subject: [PATCH 17/25] ipsec_offload: offload inner checksums calculations for UDP/TCP/TSO and allow the interface driver to declare such support. Sponsored by: NVIDIA networking Differential revision: https://reviews.freebsd.org/D44221 --- sys/net/if_var.h | 3 ++ sys/netipsec/ipsec_offload.c | 5 +- sys/netipsec/ipsec_offload.h | 7 ++- sys/netipsec/ipsec_output.c | 90 +++++++++++++++++++----------------- 4 files changed, 60 insertions(+), 45 deletions(-) diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 3e094dcb3cd5..579585b25dd2 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -148,6 +148,8 @@ enum IF_SA_CNT_WHICH { }; typedef int (*if_sa_cnt_fn_t)(if_t ifp, void *sa, uint32_t drv_spi, void *priv, struct seclifetime *lt); +typedef int (*if_ipsec_hwassist_fn_t)(if_t ifp, void *sav, + u_int drv_spi,void *priv); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ @@ -727,6 +729,7 @@ struct if_ipsec_accel_methods { if_sa_newkey_fn_t if_sa_newkey; if_sa_deinstall_fn_t if_sa_deinstall; if_sa_cnt_fn_t if_sa_cnt; + if_ipsec_hwassist_fn_t if_hwassist; }; void if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *); diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c index 851bacaf4ea1..7f63a5e0ccb6 100644 --- a/sys/netipsec/ipsec_offload.c +++ b/sys/netipsec/ipsec_offload.c @@ -799,12 +799,13 @@ ipsec_accel_output_tag(struct mbuf *m, u_int drv_spi) bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, - struct secpolicy *sp, struct secasvar *sav, int af, int mtu) + struct secpolicy *sp, struct secasvar *sav, int af, int mtu, int *hwassist) { struct ifp_handle_sav *i; struct ip *ip; u_long ip_len, skip; + *hwassist = 0; if (ifp == NULL) return (false); @@ -845,6 +846,8 @@ ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, if (sp != NULL) key_freesp(&sp); + *hwassist = ifp->if_ipsec_accel_m->if_hwassist(ifp, sav, + i->drv_spi, i->ifdata); return (true); } diff --git a/sys/netipsec/ipsec_offload.h b/sys/netipsec/ipsec_offload.h index 87e2a33288be..27b9c938832e 100644 --- a/sys/netipsec/ipsec_offload.h +++ b/sys/netipsec/ipsec_offload.h @@ -173,11 +173,14 @@ void ipsec_accel_spddel_impl(struct secpolicy *sp); int ipsec_accel_input(struct mbuf *m, int offset, int proto); bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, struct secpolicy *sp, struct secasvar *sav, int af, - int mtu); + int mtu, int *hwassist); void ipsec_accel_forget_sav(struct secasvar *sav); #else #define ipsec_accel_input(a, b, c) (ENXIO) -#define ipsec_accel_output(a, b, c, d, e, f, g) (false) +#define ipsec_accel_output(a, b, c, d, e, f, g, h) ({ \ + *h = 0; \ + false; \ +}) #define ipsec_accel_forget_sav(a) #endif diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 8f49bc8fce24..10f1728f72ac 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -195,7 +195,8 @@ ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, union sockaddr_union *dst; struct secasvar *sav; struct ip *ip; - int error, i, off; + int error, hwassist, i, off; + bool accel; IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); @@ -212,7 +213,7 @@ ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ (void)ipsec_accel_output(ifp, m, inp, sp, NULL, - AF_INET, mtu); + AF_INET, mtu, &hwassist); key_freesp(&sp); return (error); } @@ -225,7 +226,28 @@ ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; - if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu)) + hwassist = 0; + accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu, + &hwassist); + + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~hwassist) != 0) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if ((m->m_pkthdr.csum_flags & CSUM_SCTP & ~hwassist) != 0) { + struct ip *ip; + + ip = mtod(m, struct ip *); + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + } +#endif + if (accel) return (EJUSTRETURN); ip = mtod(m, struct ip *); @@ -401,25 +423,7 @@ ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, * packets, and thus, even if they are forwarded, the replies will * return back to us. */ - if (!forwarding) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#if defined(SCTP) || defined(SCTP_SUPPORT) - if (m->m_pkthdr.csum_flags & CSUM_SCTP) { - struct ip *ip; - ip = mtod(m, struct ip *); - sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); - m->m_pkthdr.csum_flags &= ~CSUM_SCTP; - } -#endif - } /* NB: callee frees mbuf and releases reference to SP */ error = ipsec4_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { @@ -596,7 +600,8 @@ ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, union sockaddr_union *dst; struct secasvar *sav; struct ip6_hdr *ip6; - int error, i, off; + int error, hwassist, i, off; + bool accel; IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); @@ -604,7 +609,7 @@ ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ (void)ipsec_accel_output(ifp, m, inp, sp, NULL, - AF_INET6, mtu); + AF_INET6, mtu, &hwassist); key_freesp(&sp); return (error); } @@ -619,7 +624,26 @@ ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; - if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu)) + hwassist = 0; + accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu, + &hwassist); + + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & ~hwassist) != 0) { + in6_delayed_cksum(m, m->m_pkthdr.len - + sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if ((m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6 & ~hwassist) != 0) { + sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + } +#endif + if (accel) return (EJUSTRETURN); ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ @@ -778,24 +802,6 @@ ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, return (0); /* No IPsec required. */ } - if (!forwarding) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { - in6_delayed_cksum(m, m->m_pkthdr.len - - sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; - } -#if defined(SCTP) || defined(SCTP_SUPPORT) - if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { - sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; - } -#endif - } - error = ipsec6_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { if (error == EJUSTRETURN) From e6e2c0a5ef5d0454b19d2f89357b431eaeb1dd76 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 6 Mar 2024 17:53:10 +0200 Subject: [PATCH 18/25] ipsec_offload: switch TF2_IPSEC_TSO on/off as appropriate on output after the interface ipsec_accel method if_hwassist() is consulted. Sponsored by: NVIDIA networking --- sys/netipsec/ipsec_offload.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c index 7f63a5e0ccb6..48082830b88b 100644 --- a/sys/netipsec/ipsec_offload.c +++ b/sys/netipsec/ipsec_offload.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -803,26 +804,31 @@ ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, { struct ifp_handle_sav *i; struct ip *ip; + struct tcpcb *tp; u_long ip_len, skip; + bool res; *hwassist = 0; + res = false; if (ifp == NULL) - return (false); + return (res); M_ASSERTPKTHDR(m); NET_EPOCH_ASSERT(); - if (sav == NULL) - return (ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS)); + if (sav == NULL) { + res = ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS); + goto out; + } i = ipsec_accel_is_accel_sav_ptr(sav, ifp); if (i == NULL) - return (false); + goto out; if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { ip_len = m->m_pkthdr.len; if (ip_len + i->hdr_ext_size > mtu) - return (false); + goto out; switch (af) { case AF_INET: ip = mtod(m, struct ip *); @@ -835,11 +841,11 @@ ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, __unreachable(); } if (!ipsec_accel_output_pad(m, sav, skip, mtu)) - return (false); + goto out; } if (!ipsec_accel_output_tag(m, i->drv_spi)) - return (false); + goto out; ipsec_accel_sa_recordxfer(sav, m); key_freesav(&sav); @@ -848,7 +854,18 @@ ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, *hwassist = ifp->if_ipsec_accel_m->if_hwassist(ifp, sav, i->drv_spi, i->ifdata); - return (true); + res = true; +out: + if (inp != NULL && inp->inp_pcbinfo == &V_tcbinfo) { + INP_WLOCK_ASSERT(inp); + tp = (struct tcpcb *)inp; + if (res && (*hwassist & (CSUM_TSO | CSUM_IP6_TSO)) != 0) { + tp->t_flags2 |= TF2_IPSEC_TSO; + } else { + tp->t_flags2 &= ~TF2_IPSEC_TSO; + } + } + return (res); } struct ipsec_accel_in_tag * From 7a296a86d1317c79de5980b8346cb7c9f87e6ddc Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 16 Jun 2024 23:50:15 +0300 Subject: [PATCH 19/25] IPSEC_OFFLOAD: add the option to GENERIC on amd64 and arm64 Sponsored by: NVIDIA networking --- sys/amd64/conf/GENERIC | 1 + sys/arm64/conf/std.arm64 | 1 + 2 files changed, 2 insertions(+) diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 49fe8fde0e81..923574adf943 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -30,6 +30,7 @@ options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 +options IPSEC_OFFLOAD # Inline ipsec offload infra options ROUTE_MPATH # Multipath routing support options FIB_ALGO # Modular fib lookups options TCP_OFFLOAD # TCP offload diff --git a/sys/arm64/conf/std.arm64 b/sys/arm64/conf/std.arm64 index cc4a5acbb314..69f333e32ed9 100644 --- a/sys/arm64/conf/std.arm64 +++ b/sys/arm64/conf/std.arm64 @@ -12,6 +12,7 @@ options INET # InterNETworking options INET6 # IPv6 communications protocols options CC_CUBIC # include CUBIC congestion control options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 +options IPSEC_OFFLOAD # Inline ipsec offload infra options ROUTE_MPATH # Multipath routing support options FIB_ALGO # Modular fib lookups options TCP_OFFLOAD # TCP offload From 7818c2d37c2c600fc9ad6f2a0951e50dd21b17c8 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Fri, 12 Jul 2024 11:28:35 +0000 Subject: [PATCH 20/25] armv6: Remove support for building armv6 With it planned that armv7 will be the only 32-bit kernel when 15.0 is released remove support for armv6. Remove the top level build infrastructure. It was already removed from universe, this just stops it from being built directly. Reviewed by: mmel, emaste Sponsored by: Arm Ltd Differential Revision: https://reviews.freebsd.org/D45634 --- Makefile | 6 +----- Makefile.inc1 | 1 - UPDATING | 3 +++ sys/sys/param.h | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index b2613b68b645..5c113d5b83cc 100644 --- a/Makefile +++ b/Makefile @@ -521,9 +521,6 @@ worlds: .PHONY # Don't build rarely used, semi-supported architectures unless requested. # .if defined(EXTRA_TARGETS) -# armv6's importance has waned enough to make building it the exception rather -# than the rule. -EXTRA_ARCHES_arm= armv6 # powerpcspe excluded from main list until clang fixed EXTRA_ARCHES_powerpc= powerpcspe .endif @@ -535,8 +532,7 @@ TARGET_ARCHES_${target}= ${MACHINE_ARCH_LIST_${target}} .if defined(USE_GCC_TOOLCHAINS) TOOLCHAINS_amd64= amd64-gcc12 -TOOLCHAINS_arm= armv6-gcc12 armv7-gcc12 -TOOLCHAIN_armv7= armv7-gcc12 +TOOLCHAINS_arm= armv7-gcc12 TOOLCHAINS_arm64= aarch64-gcc12 TOOLCHAINS_i386= i386-gcc12 TOOLCHAINS_powerpc= powerpc-gcc12 powerpc64-gcc12 diff --git a/Makefile.inc1 b/Makefile.inc1 index 2f442bc9a394..19ed923702b1 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -146,7 +146,6 @@ TARGET_TRIPLE_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:S/amd64/x86_64/}-${TARGET_TRIPLE_ABI}-freebsd${OS_REVISION} KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ - armv6/arm \ armv7/arm \ i386 \ powerpc \ diff --git a/UPDATING b/UPDATING index 2b6f9cb0d956..fc3abb285039 100644 --- a/UPDATING +++ b/UPDATING @@ -27,6 +27,9 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 15.x IS SLOW: world, or to merely disable the most expensive debugging functionality at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20240712: + Support for armv6 has been disconnected and is being removed. + 20240617: ifconfig now treats IPv4 addresses without a width or mask as an error. Specify the desired mask or width along with the IP address on the diff --git a/sys/sys/param.h b/sys/sys/param.h index 75370d5998fb..887c595a1de5 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -73,7 +73,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500019 +#define __FreeBSD_version 1500020 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, From 06999c8a3a3cbd254b8e52b25549e5f5222dafa6 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Fri, 12 Jul 2024 11:28:44 +0000 Subject: [PATCH 21/25] share/mk: Remove armv6 support It is being removed from the tree. Remove the build infrastructure to configure armv6 builds. Reviewed by: manu, imp, emaste Sponsored by: Arm Ltd Differential Revision: https://reviews.freebsd.org/D45643 --- share/mk/bsd.cpu.mk | 5 +---- share/mk/bsd.opts.mk | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/share/mk/bsd.cpu.mk b/share/mk/bsd.cpu.mk index 793c01b76dca..29eb4662dcdd 100644 --- a/share/mk/bsd.cpu.mk +++ b/share/mk/bsd.cpu.mk @@ -315,13 +315,10 @@ MACHINE_CPU = riscv ########## arm .if ${MACHINE_CPUARCH} == "arm" MACHINE_CPU += arm -. if ${MACHINE_ARCH:Marmv6*} != "" -MACHINE_CPU += armv6 -. endif . if ${MACHINE_ARCH:Marmv7*} != "" MACHINE_CPU += armv7 . endif -# Normally armv6 and armv7 are hard float ABI from FreeBSD 11 onwards. However +# Normally armv7 is hard float ABI from FreeBSD 11 onwards. However # when CPUTYPE has 'soft' in it, we use the soft-float ABI to allow building of # soft-float ABI libraries. In this case, we have to add the -mfloat-abi=softfp # to force that. diff --git a/share/mk/bsd.opts.mk b/share/mk/bsd.opts.mk index 18098c93605c..136215a2db47 100644 --- a/share/mk/bsd.opts.mk +++ b/share/mk/bsd.opts.mk @@ -95,7 +95,7 @@ __DEFAULT_DEPENDENT_OPTIONS = \ # means that ASLR is of limited effectiveness, and it may cause issues with # some memory-hungry workloads. # -.if ${MACHINE_ARCH} == "armv6" || ${MACHINE_ARCH} == "armv7" \ +.if ${MACHINE_ARCH} == "armv7" \ || ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" \ || ${MACHINE_ARCH} == "powerpcspe" __DEFAULT_NO_OPTIONS+= PIE From 97dbe3e7ace2c222b18f4eb787de91ee44192e76 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Fri, 12 Jul 2024 11:28:50 +0000 Subject: [PATCH 22/25] libclang_rt: Simplify the arm check We just need to check we are building for arm. Reviewed by: manu, emaste Sponsored by: Arm Ltd Differential Revision: https://reviews.freebsd.org/D45644 --- lib/libclang_rt/compiler-rt-vars.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libclang_rt/compiler-rt-vars.mk b/lib/libclang_rt/compiler-rt-vars.mk index 5a9f115697d9..c4a33309bc89 100644 --- a/lib/libclang_rt/compiler-rt-vars.mk +++ b/lib/libclang_rt/compiler-rt-vars.mk @@ -5,7 +5,7 @@ SANITIZER_SHAREDIR= ${CLANGDIR}/share # armv[67] is a bit special since we allow a soft-floating version via # CPUTYPE matching *soft*. This variant may not actually work though. -.if ${MACHINE_ARCH:Marmv[67]*} != "" && \ +.if ${MACHINE_CPUARCH} == "arm" && \ (!defined(CPUTYPE) || ${CPUTYPE:M*soft*} == "") CRTARCH?= armhf .else From f5c7644378f884dad473011a1f9350fed6fe4e4e Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Fri, 12 Jul 2024 11:28:57 +0000 Subject: [PATCH 23/25] sys/arm: Remove armv6 kernel configs Only the Raspberry Pi config was supported on armv6. Remove it in preparation for removing armv6 support from the kernel. Reviewed by: manu, emaste Sponsored by: Arm Ltd Differential Revision: https://reviews.freebsd.org/D45646 --- sys/arm/conf/RPI-B | 109 ----------------------------------------- sys/arm/conf/std.armv6 | 81 ------------------------------ 2 files changed, 190 deletions(-) delete mode 100644 sys/arm/conf/RPI-B delete mode 100644 sys/arm/conf/std.armv6 diff --git a/sys/arm/conf/RPI-B b/sys/arm/conf/RPI-B deleted file mode 100644 index d0a3ee13c367..000000000000 --- a/sys/arm/conf/RPI-B +++ /dev/null @@ -1,109 +0,0 @@ -# -# RPI-B -- Custom configuration for the Raspberry Pi -# -# For more information on this file, please read the config(5) manual page, -# and/or the handbook section on Kernel Configuration Files: -# -# https://docs.freebsd.org/en/books/handbook/kernelconfig/#kernelconfig-config -# -# The handbook is also available locally in /usr/share/doc/handbook -# if you've installed the doc distribution, otherwise always see the -# FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the -# latest information. -# -# An exhaustive list of options and more detailed explanations of the -# device lines is also present in the ../../conf/NOTES and NOTES files. -# If you are in doubt as to the purpose or necessity of a line, check first -# in NOTES. -# - -ident RPI-B - -include "std.armv6" -include "../broadcom/bcm2835/std.rpi" -include "../broadcom/bcm2835/std.bcm2835" - -options SCHED_4BSD # 4BSD scheduler -options PLATFORM - -# NFS root from boopt/dhcp -#options BOOTP -#options BOOTP_NFSROOT -#options BOOTP_COMPAT -#options BOOTP_NFSV3 -#options BOOTP_WIRED_TO=ue0 - -#options ROOTDEVNAME=\"ufs:mmcsd0s2\" - -# pseudo devices -device clk -device phy -device hwreset -device nvmem -device regulator -device syscon - -device bpf -device loop -device ether -device uart -device pty -device snp -device pl011 - -# Device mode support -device usb_template # Control of the gadget - -# Comment following lines for boot console on serial port -device vt -device kbdmux -device hkbd -device ukbd - -device sdhci -device mmc -device mmcsd - -device gpio -device gpioled - -# I2C -device iic -device iicbus -device bcm2835_bsc - -device md - -# USB support -device usb -device dwcotg # DWC OTG controller - -# USB storage support -device scbus -device da -device umass - -# USB ethernet support -device smscphy -device mii -device smsc - -# SPI -device spibus -device bcm2835_spi - -device vchiq -device sound - -device fdt_pinctrl - -# HID support -device hid # Generic HID support - -# Flattened Device Tree -options FDT # Configure using FDT/DTB data -# Note: DTB is normally loaded and modified by RPi boot loader, then -# handed to kernel via U-Boot and ubldr. -#options FDT_DTB_STATIC -#makeoptions FDT_DTS_FILE=rpi.dts -makeoptions MODULES_EXTRA="dtb/rpi rpi_ft5406" diff --git a/sys/arm/conf/std.armv6 b/sys/arm/conf/std.armv6 deleted file mode 100644 index a24227fc838f..000000000000 --- a/sys/arm/conf/std.armv6 +++ /dev/null @@ -1,81 +0,0 @@ -# Standard kernel config items for all ARMv6 systems. -# - -options HZ=1000 -options PREEMPTION # Enable kernel thread preemption -options VIMAGE # Subsystem virtualization, e.g. VNET -options INET # InterNETworking -options INET6 # IPv6 communications protocols -options CC_CUBIC # include CUBIC congestion control -options TCP_HHOOK # hhook(9) framework for TCP -device crypto # core crypto support -options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 -options NETLINK # netlink(4) support -options SCTP_SUPPORT # Allow kldload of SCTP -options FFS # Berkeley Fast Filesystem -options SOFTUPDATES # Enable FFS soft updates support -options UFS_ACL # Support for access control lists -options UFS_DIRHASH # Improve performance on big directories -options UFS_GJOURNAL # Enable gjournal-based UFS journaling -options QUOTA # Enable disk quotas for UFS -options NFSCL # Network Filesystem Client -options NFSLOCKD # Network Lock Manager -options NFS_ROOT # NFS usable as /, requires NFSCL -options MSDOSFS # MSDOS Filesystem -options CD9660 # ISO 9660 Filesystem -options PROCFS # Process filesystem (requires PSEUDOFS) -options PSEUDOFS # Pseudo-filesystem framework -options TMPFS # Efficient memory filesystem -options GEOM_PART_GPT # GUID Partition Tables -options GEOM_PART_BSD # BSD partition scheme -options GEOM_PART_MBR # MBR partition scheme -options GEOM_LABEL # Provides labelization -options COMPAT_43 # Compatible with BSD 4.3 [KEEP THIS!] -options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI -options KTRACE # ktrace(1) support -options STACK # stack(9) support -options SYSVSHM # SYSV-style shared memory -options SYSVMSG # SYSV-style message queues -options SYSVSEM # SYSV-style semaphores -options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions -options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. -options KBD_INSTALL_CDEV # install a CDEV entry in /dev -options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) -options CAPABILITY_MODE # Capsicum capability mode -options CAPABILITIES # Capsicum capabilites -options FREEBSD_BOOT_LOADER # Process metadata passed from loader(8) -options VFP # Enable floating point hardware support -options MAC # Support for Mandatory Access Control (MAC) - -options COMPAT_FREEBSD10 # Compatible with FreeBSD10 -options COMPAT_FREEBSD11 # Compatible with FreeBSD11 -options COMPAT_FREEBSD12 # Compatible with FreeBSD12 -options COMPAT_FREEBSD13 # Compatible with FreeBSD13 -options COMPAT_FREEBSD14 # Compatible with FreeBSD14 - -# DTrace support -options KDTRACE_HOOKS # Kernel DTrace hooks -options DDB_CTF # all architectures - kernel ELF linker loads CTF data -makeoptions WITH_CTF=1 - -# Debugging support. Always need this: -makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols -options KDB # Enable kernel debugger support. -options KDB_TRACE # Print a stack trace for a panic. - -options USB_DEBUG # Enable usb debug support code - -# For full debugger support use (turn off in stable branch): -include "std.debug" - -# Optional extras, never enabled by default: -#options BOOTVERBOSE -#options DEBUG # May result in extreme spewage -#options KTR -#options KTR_COMPILE=KTR_ALL -#options KTR_ENTRIES=16384 -#options KTR_MASK=(KTR_SPARE2) -#options KTR_VERBOSE=0 -#options USB_REQ_DEBUG -#options USB_VERBOSE - From cb18ba9df52df247198c0f41979090686a0f8403 Mon Sep 17 00:00:00 2001 From: Alexander Ziaee Date: Thu, 4 Jul 2024 20:22:03 -0400 Subject: [PATCH 24/25] time.1: minor cleanup (alignment/macro/spdx) + shorter example filename to minimize line wrap + standards macro clarifying posix => posix.2 + align options + tag spdx Reviewed by: mhorne MFC after: 3 days Pull-Request: https://github.com/freebsd/freebsd-src/pull/1315 --- usr.bin/time/time.1 | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/usr.bin/time/time.1 b/usr.bin/time/time.1 index 6f70e8e52759..e6cfa6affaa1 100644 --- a/usr.bin/time/time.1 +++ b/usr.bin/time/time.1 @@ -1,3 +1,6 @@ +.\"- +.\" SPDX-License-Identifier: BSD-3-Clause +.\" .\" Copyright (c) 1980, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" @@ -56,7 +59,7 @@ the time used to execute the process and the time consumed by system overhead. .Pp The following options are available: -.Bl -tag -width indent +.Bl -tag -width "-o file" .It Fl a If the .Fl o @@ -143,8 +146,8 @@ file. Then execute the command again to make a new copy and add the result to the same file: .Bd -literal -offset indent -$ /usr/bin/time -o times.txt cp FreeBSD-12.1-RELEASE-amd64-bootonly.iso copy1.iso -$ /usr/bin/time -a -o times.txt cp FreeBSD-12.1-RELEASE-amd64-bootonly.iso copy2.iso +$ /usr/bin/time -o times.txt cp source.iso copy1.iso +$ /usr/bin/time -a -o times.txt cp source.iso copy2.iso .Ed .Pp The @@ -190,7 +193,8 @@ sys 0.00 .Sh STANDARDS The .Nm -utility is expected to conform to ISO/IEC 9945-2:1993 (``POSIX''). +utility is expected to conform to +.St -iso9945-2-93 .Sh HISTORY A .Nm From 6ac0f711ad9ecd9ac1525787bb08002c85c03cde Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 12 Jul 2024 11:36:40 -0400 Subject: [PATCH 25/25] nvmecontrol: Fix "Workloadd" typo MFC after: 1 week --- sbin/nvmecontrol/power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbin/nvmecontrol/power.c b/sbin/nvmecontrol/power.c index 57270e1995e3..b26570da0c52 100644 --- a/sbin/nvmecontrol/power.c +++ b/sbin/nvmecontrol/power.c @@ -90,7 +90,7 @@ power_list(struct nvme_controller_data *cdata) int i; printf("\nPower States Supported: %d\n\n", cdata->npss + 1); - printf(" # Max pwr Enter Lat Exit Lat RT RL WT WL Idle Pwr Act Pwr Workloadd\n"); + printf(" # Max pwr Enter Lat Exit Lat RT RL WT WL Idle Pwr Act Pwr Workload\n"); printf("-- -------- --------- --------- -- -- -- -- -------- -------- --\n"); for (i = 0; i <= cdata->npss; i++) power_list_one(i, &cdata->power_state[i]);