From 4605a99b51ab72351d7554fbadbb24985f4667b1 Mon Sep 17 00:00:00 2001 From: Andrew Gallatin Date: Fri, 15 Nov 2024 10:41:34 -0500 Subject: [PATCH 1/2] aio: remove write-only jobid & kernelinfo The jobid (which was stored in kernelinfo) was used to look up jobs until 1ce9182407f6, where it became essentially write only. Remove it to simplify the code and pave the way for future work to make aio scale better. Note this has been slated for removal "soon" for 18 years. Suggested by: jhb Reviewed by: kib Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D47583 --- sys/kern/vfs_aio.c | 42 +----------------------------------------- sys/sys/aio.h | 2 +- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index e7302f4b7a9e..eb08716fbeda 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -71,12 +71,6 @@ #include #include -/* - * Counter for allocating reference ids to new jobs. Wrapped to 1 on - * overflow. (XXX will be removed soon.) - */ -static u_long jobrefid; - /* * Counter for aio_fsync. */ @@ -297,7 +291,6 @@ struct aiocb_ops { long (*fetch_error)(struct aiocb *ujob); int (*store_status)(struct aiocb *ujob, long status); int (*store_error)(struct aiocb *ujob, long error); - int (*store_kernelinfo)(struct aiocb *ujob, long jobref); int (*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob); }; @@ -418,7 +411,6 @@ aio_onceonly(void) aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); aiod_lifetime = AIOD_LIFETIME_DEFAULT; - jobrefid = 1; p31b_setcfg(CTL_P1003_1B_ASYNCHRONOUS_IO, _POSIX_ASYNCHRONOUS_IO); p31b_setcfg(CTL_P1003_1B_AIO_MAX, MAX_AIO_QUEUE); p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, 0); @@ -1455,13 +1447,6 @@ aiocb_store_error(struct aiocb *ujob, long error) return (suword(&ujob->_aiocb_private.error, error)); } -static int -aiocb_store_kernelinfo(struct aiocb *ujob, long jobref) -{ - - return (suword(&ujob->_aiocb_private.kernelinfo, jobref)); -} - static int aiocb_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) { @@ -1475,7 +1460,6 @@ static struct aiocb_ops aiocb_ops = { .fetch_error = aiocb_fetch_error, .store_status = aiocb_store_status, .store_error = aiocb_store_error, - .store_kernelinfo = aiocb_store_kernelinfo, .store_aiocb = aiocb_store_aiocb, }; @@ -1486,7 +1470,6 @@ static struct aiocb_ops aiocb_ops_osigevent = { .fetch_error = aiocb_fetch_error, .store_status = aiocb_store_status, .store_error = aiocb_store_error, - .store_kernelinfo = aiocb_store_kernelinfo, .store_aiocb = aiocb_store_aiocb, }; #endif @@ -1507,7 +1490,6 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, int opcode; int error; int fd, kqfd; - int jid; u_short evflags; if (p->p_aioinfo == NULL) @@ -1517,7 +1499,6 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, ops->store_status(ujob, -1); ops->store_error(ujob, 0); - ops->store_kernelinfo(ujob, -1); if (num_queue_count >= max_queue_count || ki->kaio_count >= max_aio_queue_per_proc) { @@ -1630,16 +1611,8 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, job->fd_file = fp; mtx_lock(&aio_job_mtx); - jid = jobrefid++; job->seqno = jobseqno++; mtx_unlock(&aio_job_mtx); - error = ops->store_kernelinfo(ujob, jid); - if (error) { - error = EINVAL; - goto err3; - } - job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid; - if (opcode == LIO_NOP) { fdrop(fp, td); MPASS(job->uiop == &job->uio || job->uiop == NULL); @@ -2728,7 +2701,7 @@ filt_lio(struct knote *kn, long hint) struct __aiocb_private32 { int32_t status; int32_t error; - uint32_t kernelinfo; + uint32_t spare; }; #ifdef COMPAT_FREEBSD6 @@ -2807,7 +2780,6 @@ aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob, CP(job32, *kcb, aio_reqprio); CP(job32, *kcb, _aiocb_private.status); CP(job32, *kcb, _aiocb_private.error); - PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo); return (convert_old_sigevent32(&job32.aio_sigevent, &kcb->aio_sigevent)); } @@ -2844,7 +2816,6 @@ aiocb32_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type) CP(job32, *kcb, aio_reqprio); CP(job32, *kcb, _aiocb_private.status); CP(job32, *kcb, _aiocb_private.error); - PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo); error = convert_sigevent32(&job32.aio_sigevent, &kcb->aio_sigevent); return (error); @@ -2886,15 +2857,6 @@ aiocb32_store_error(struct aiocb *ujob, long error) return (suword32(&ujob32->_aiocb_private.error, error)); } -static int -aiocb32_store_kernelinfo(struct aiocb *ujob, long jobref) -{ - struct aiocb32 *ujob32; - - ujob32 = (struct aiocb32 *)ujob; - return (suword32(&ujob32->_aiocb_private.kernelinfo, jobref)); -} - static int aiocb32_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) { @@ -2908,7 +2870,6 @@ static struct aiocb_ops aiocb32_ops = { .fetch_error = aiocb32_fetch_error, .store_status = aiocb32_store_status, .store_error = aiocb32_store_error, - .store_kernelinfo = aiocb32_store_kernelinfo, .store_aiocb = aiocb32_store_aiocb, }; @@ -2919,7 +2880,6 @@ static struct aiocb_ops aiocb32_ops_osigevent = { .fetch_error = aiocb32_fetch_error, .store_status = aiocb32_store_status, .store_error = aiocb32_store_error, - .store_kernelinfo = aiocb32_store_kernelinfo, .store_aiocb = aiocb32_store_aiocb, }; #endif diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 919a6180b130..e979e0105e68 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -97,7 +97,7 @@ struct __aiocb_private { long status; long error; - void *kernelinfo; + void *spare; }; /* From 12fc79619acaa3041f9c032bc9afe7d2005b942e Mon Sep 17 00:00:00 2001 From: Randall Stewart Date: Fri, 15 Nov 2024 12:37:05 -0500 Subject: [PATCH 2/2] Change the SOCKBUF_LOCK calls to use the more refined SOCK_XXXBUF_LOCK/UNLOCK. The socket buffer locking used to be standard on SOCKBUF_LOCK/UNLOCK. But we are now moving to a more elegant SOCK_SENDBUF_LOCK/UNLOCK and SOCK_RECVBUF_LOCK/UNLOCK. Lets get BBR and Rack to use these updated macros. Reviewed by:glebius, tuexen, rscheff Differential Revision:https://reviews.freebsd.org/D47542 --- sys/netinet/tcp_stacks/bbr.c | 34 ++++++++++++++--------------- sys/netinet/tcp_stacks/rack.c | 40 +++++++++++++++++------------------ 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 4ab12884b379..01a864f59bbe 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -7823,7 +7823,7 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, (int)(ticks - tp->t_badrxtwin) < 0) bbr_cong_signal(tp, th, CC_RTO_ERR, NULL); } - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); acked_amount = min(acked, (int)sbavail(&so->so_snd)); tp->snd_wnd -= acked_amount; mfree = sbcut_locked(&so->so_snd, acked_amount); @@ -8295,7 +8295,7 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, thflags = tcp_get_flags(th) & TH_FIN; KMOD_TCPSTAT_ADD(tcps_rcvpack, (int)nsegs); KMOD_TCPSTAT_ADD(tcps_rcvbyte, tlen); - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) m_freem(m); else @@ -8528,7 +8528,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, newsize = tcp_autorcvbuf(m, th, so, tp, tlen); /* Add data to socket buffer. */ - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { m_freem(m); } else { @@ -12138,7 +12138,7 @@ again: len = 0; rsm = NULL; if (flags & TH_RST) { - SOCKBUF_LOCK(sb); + SOCK_SENDBUF_LOCK(so); goto send; } recheck_resend: @@ -12205,7 +12205,7 @@ recheck_resend: } else { /* Retransmitting SYN */ rsm = NULL; - SOCKBUF_LOCK(sb); + SOCK_SENDBUF_LOCK(so); goto send; } } else @@ -12304,7 +12304,7 @@ recheck_resend: kern_prefetch(end_rsm, &prefetch_rsm); prefetch_rsm = 1; } - SOCKBUF_LOCK(sb); + SOCK_SENDBUF_LOCK(so); /* * If snd_nxt == snd_max and we have transmitted a FIN, the * sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a @@ -12721,7 +12721,7 @@ recheck_resend: * No reason to send a segment, just return. */ just_return: - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); just_return_nolock: if (tot_len) slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0); @@ -12829,7 +12829,7 @@ send: len--; } } - SOCKBUF_LOCK_ASSERT(sb); + SOCK_SENDBUF_LOCK_ASSERT(so); if (len > 0) { if ((tp->snd_una == tp->snd_max) && (bbr_calc_time(cts, bbr->r_ctl.rc_went_idle_time) >= bbr_rtt_probe_time)) { @@ -12945,7 +12945,7 @@ send: if (tp->t_port) { if (V_tcp_udp_tunneling_port == 0) { /* The port was removed?? */ - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); return (EHOSTUNREACH); } hdrlen += sizeof(struct udphdr); @@ -13036,7 +13036,7 @@ send: * byte of the payload can be put into the * TCP segment. */ - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); error = EMSGSIZE; sack_rxmit = 0; goto out; @@ -13106,7 +13106,7 @@ send: if (m == NULL) { BBR_STAT_INC(bbr_failed_mbuf_aloc); bbr_log_enobuf_jmp(bbr, len, cts, __LINE__, len, 0, 0); - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); error = ENOBUFS; sack_rxmit = 0; goto out; @@ -13150,7 +13150,7 @@ send: * is the only thing to do. */ BBR_STAT_INC(bbr_offset_drop); - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); (void)m_free(m); return (-EFAULT); /* tcp_drop() */ } @@ -13210,7 +13210,7 @@ send: tso = 0; } if (m->m_next == NULL) { - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); (void)m_free(m); error = ENOBUFS; sack_rxmit = 0; @@ -13246,9 +13246,9 @@ send: !(flags & TH_SYN)) { flags |= TH_PUSH; } - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); } else { - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); if (tp->t_flags & TF_ACKNOW) KMOD_TCPSTAT_INC(tcps_sndacks); else if (flags & (TH_SYN | TH_FIN | TH_RST)) @@ -13274,7 +13274,7 @@ send: m->m_data += max_linkhdr; m->m_len = hdrlen; } - SOCKBUF_UNLOCK_ASSERT(sb); + SOCK_SENDBUF_UNLOCK_ASSERT(so); m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef MAC mac_inpcb_create_mbuf(inp, m); @@ -13766,7 +13766,7 @@ nomore: * Everything else will just have to retransmit with the timer * (no pacer). */ - SOCKBUF_UNLOCK_ASSERT(sb); + SOCK_SENDBUF_UNLOCK_ASSERT(so); BBR_STAT_INC(bbr_saw_oerr); /* Clear all delay/early tracks */ bbr->r_ctl.rc_hptsi_agg_delay = 0; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 30b0704ed7d6..9561c957bb00 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -11992,7 +11992,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, /* Must be non-newreno (cubic) getting too ahead of itself */ tp->snd_cwnd = p_cwnd; } - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); acked_amount = min(acked, (int)sbavail(&so->so_snd)); tp->snd_wnd -= acked_amount; mfree = sbcut_locked(&so->so_snd, acked_amount); @@ -12378,7 +12378,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, thflags = tcp_get_flags(th) & TH_FIN; KMOD_TCPSTAT_ADD(tcps_rcvpack, nsegs); KMOD_TCPSTAT_ADD(tcps_rcvbyte, tlen); - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { m_freem(m); } else { @@ -12620,7 +12620,7 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, newsize = tcp_autorcvbuf(m, th, so, tp, tlen); /* Add data to socket buffer. */ - SOCKBUF_LOCK(&so->so_rcv); + SOCK_RECVBUF_LOCK(so); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { m_freem(m); } else { @@ -12779,7 +12779,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct mbuf *mfree; rack_ack_received(tp, rack, th->th_ack, nsegs, CC_ACK, 0); - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); mfree = sbcut_locked(&so->so_snd, acked); tp->snd_una = th->th_ack; /* Note we want to hold the sb lock through the sendmap adjust */ @@ -16108,7 +16108,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb /* Must be non-newreno (cubic) getting too ahead of itself */ tp->snd_cwnd = p_cwnd; } - SOCKBUF_LOCK(&so->so_snd); + SOCK_SENDBUF_LOCK(so); mfree = sbcut_locked(&so->so_snd, acked_amount); tp->snd_una = high_seq; /* Note we want to hold the sb lock through the sendmap adjust */ @@ -19531,9 +19531,9 @@ again: rack->r_fast_output = 0; rack->r_ctl.fsb.left_to_send = 0; /* At the end of fast_output scale up the sb */ - SOCKBUF_LOCK(&rack->rc_inp->inp_socket->so_snd); + SOCK_SENDBUF_LOCK(rack->rc_inp->inp_socket); rack_sndbuf_autoscale(rack); - SOCKBUF_UNLOCK(&rack->rc_inp->inp_socket->so_snd); + SOCK_SENDBUF_UNLOCK(rack->rc_inp->inp_socket); } if (tp->t_rtttime == 0) { tp->t_rtttime = ticks; @@ -20111,7 +20111,7 @@ again: len = 0; rsm = NULL; if (flags & TH_RST) { - SOCKBUF_LOCK(&inp->inp_socket->so_snd); + SOCK_SENDBUF_LOCK(inp->inp_socket); so = inp->inp_socket; sb = &so->so_snd; goto send; @@ -20370,7 +20370,7 @@ again: kern_prefetch(end_rsm, &prefetch_rsm); prefetch_rsm = 1; } - SOCKBUF_LOCK(sb); + SOCK_SENDBUF_LOCK(so); if ((sack_rxmit == 0) && (TCPS_HAVEESTABLISHED(tp->t_state) || (tp->t_flags & TF_FASTOPEN))) { @@ -20880,7 +20880,7 @@ dontupdate: * No reason to send a segment, just return. */ just_return: - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); just_return_nolock: { int app_limited = CTF_JR_SENT_DATA; @@ -21139,7 +21139,7 @@ send: rack->r_ctl.rc_agg_early = 0; rack->r_early = 0; rack->r_late = 0; - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); goto skip_all_send; } } @@ -21356,7 +21356,7 @@ send: if (tp->t_port) { if (V_tcp_udp_tunneling_port == 0) { /* The port was removed?? */ - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); #ifdef TCP_ACCOUNTING crtsc = get_cyclecount(); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { @@ -21459,7 +21459,7 @@ send: * byte of the payload can be put into the * TCP segment. */ - SOCKBUF_UNLOCK(&so->so_snd); + SOCK_SENDBUF_UNLOCK(so); error = EMSGSIZE; sack_rxmit = 0; goto out; @@ -21542,7 +21542,7 @@ send: m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); error = ENOBUFS; sack_rxmit = 0; goto out; @@ -21600,7 +21600,7 @@ send: tso = 0; } if (m->m_next == NULL) { - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); (void)m_free(m); error = ENOBUFS; sack_rxmit = 0; @@ -21644,9 +21644,9 @@ send: add_flag |= RACK_HAD_PUSH; } - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); } else { - SOCKBUF_UNLOCK(sb); + SOCK_SENDBUF_UNLOCK(so); if (tp->t_flags & TF_ACKNOW) KMOD_TCPSTAT_INC(tcps_sndacks); else if (flags & (TH_SYN | TH_FIN | TH_RST)) @@ -21669,7 +21669,7 @@ send: m->m_data += max_linkhdr; m->m_len = hdrlen; } - SOCKBUF_UNLOCK_ASSERT(sb); + SOCK_SENDBUF_UNLOCK_ASSERT(so); m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef MAC mac_inpcb_create_mbuf(inp, m); @@ -22331,7 +22331,7 @@ out: len = n_len; sb_offset = tp->snd_max - tp->snd_una; /* Re-lock for the next spin */ - SOCKBUF_LOCK(sb); + SOCK_SENDBUF_LOCK(so); goto send; } } else { @@ -22350,7 +22350,7 @@ out: len = n_len; sb_offset = tp->snd_max - tp->snd_una; /* Re-lock for the next spin */ - SOCKBUF_LOCK(sb); + SOCK_SENDBUF_LOCK(so); goto send; } }