Index: sys/kern/uipc_socket2.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/kern/uipc_socket2.c,v diff -u -p -u -p -r1.164 uipc_socket2.c --- sys/kern/uipc_socket2.c 5 Jan 2025 12:36:48 -0000 1.164 +++ sys/kern/uipc_socket2.c 10 Jan 2025 22:29:28 -0000 @@ -220,6 +220,8 @@ sonewconn(struct socket *head, int conns */ if (persocket) solock(so); + else + rw_enter_write(&so->so_lock); /* * Inherit watermarks but those may get clamped in low mem situations. @@ -260,6 +262,8 @@ sonewconn(struct socket *head, int conns fail: if (persocket) sounlock(so); + else + rw_exit_write(&so->so_lock); sigio_free(&so->so_sigio); klist_free(&so->so_rcv.sb_klist); klist_free(&so->so_snd.sb_klist); Index: sys/netinet/in_proto.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet/in_proto.c,v diff -u -p -u -p -r1.121 in_proto.c --- sys/netinet/in_proto.c 5 Jan 2025 12:36:48 -0000 1.121 +++ sys/netinet/in_proto.c 10 Jan 2025 22:29:28 -0000 @@ -197,7 +197,8 @@ const struct protosw inetsw[] = { .pr_type = SOCK_STREAM, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_TCP, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE| + PR_MPINPUT, .pr_input = tcp_input, .pr_ctlinput = tcp_ctlinput, .pr_ctloutput = tcp_ctloutput, Index: sys/netinet/tcp_input.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -u -p -r1.421 tcp_input.c --- sys/netinet/tcp_input.c 9 Jan 2025 16:47:24 -0000 1.421 +++ sys/netinet/tcp_input.c 10 Jan 2025 22:29:28 -0000 @@ -605,6 +605,11 @@ findpcb: tcpstat_inc(tcps_noport); goto dropwithreset_ratelim; } + so = in_pcbsolock_ref(inp); + if (so == NULL) { + tcpstat_inc(tcps_noport); + goto dropwithreset_ratelim; + } KASSERT(sotoinpcb(inp->inp_socket) == inp); KASSERT(intotcpcb(inp) == NULL || intotcpcb(inp)->t_inpcb == inp); @@ -637,7 +642,6 @@ findpcb: else tiwin = th->th_win; - so = inp->inp_socket; if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { union syn_cache_sa src; union syn_cache_sa dst; @@ -726,6 +730,7 @@ findpcb: * in use for the reply, * do not free it. */ + so = NULL; m = *mp = NULL; goto drop; } else { @@ -733,13 +738,11 @@ findpcb: * We have created a * full-blown connection. */ - tp = NULL; in_pcbunref(inp); inp = in_pcbref(sotoinpcb(so)); tp = intotcpcb(inp); if (tp == NULL) goto badsyn; /*XXX*/ - } break; @@ -845,6 +848,7 @@ findpcb: tcpstat_inc(tcps_dropsyn); goto drop; } + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1020,6 +1024,7 @@ findpcb: if (so->so_snd.sb_cc || tp->t_flags & TF_NEEDOUTPUT) (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1070,6 +1075,7 @@ findpcb: tp->t_flags &= ~TF_BLOCKOUTPUT; if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -1262,6 +1268,8 @@ trimthenstep6: ((arc4random() & 0x7fffffff) | 0x8000); reuse = &iss; tp = tcp_close(tp); + in_pcbsounlock_rele(inp, so); + so = NULL; in_pcbunref(inp); inp = NULL; goto findpcb; @@ -2062,6 +2070,7 @@ dodata: /* XXX */ */ if (tp->t_flags & (TF_ACKNOW|TF_NEEDOUTPUT)) (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2091,6 +2100,7 @@ dropafterack: m_freem(m); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2126,6 +2136,7 @@ dropwithreset: (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now); } m_freem(m); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; @@ -2137,6 +2148,7 @@ drop: tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen); m_freem(m); + in_pcbsounlock_rele(inp, so); in_pcbunref(inp); return IPPROTO_DONE; } @@ -3531,15 +3543,18 @@ syn_cache_get(struct sockaddr *src, stru struct inpcb *inp, *oldinp; struct tcpcb *tp = NULL; struct mbuf *am; - struct socket *oso; + struct socket *oldso; u_int rtableid; NET_ASSERT_LOCKED(); + inp = sotoinpcb(so); + mtx_enter(&syn_cache_mtx); sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid); if (sc == NULL) { mtx_leave(&syn_cache_mtx); + in_pcbsounlock_rele(inp, so); return (NULL); } @@ -3553,6 +3568,7 @@ syn_cache_get(struct sockaddr *src, stru refcnt_take(&sc->sc_refcnt); mtx_leave(&syn_cache_mtx); (void) syn_cache_respond(sc, m, now, do_ecn); + in_pcbsounlock_rele(inp, so); syn_cache_put(sc); return ((struct socket *)(-1)); } @@ -3567,12 +3583,13 @@ syn_cache_get(struct sockaddr *src, stru * connection when the SYN arrived. If we can't create * the connection, abort it. */ - oso = so; + oldso = so; + oldinp = inp; so = sonewconn(so, SS_ISCONNECTED, M_DONTWAIT); if (so == NULL) goto resetandabort; - - oldinp = sotoinpcb(oso); + soassertlocked(so); + soref(so); inp = sotoinpcb(so); #ifdef IPSEC @@ -3633,7 +3650,7 @@ syn_cache_get(struct sockaddr *src, stru (void) m_free(am); tp = intotcpcb(inp); - tp->t_flags = sototcpcb(oso)->t_flags & (TF_NOPUSH|TF_NODELAY); + tp->t_flags = sototcpcb(oldso)->t_flags & (TF_NOPUSH|TF_NODELAY); if (sc->sc_request_r_scale != 15) { tp->requested_s_scale = sc->sc_requested_s_scale; tp->request_r_scale = sc->sc_request_r_scale; @@ -3645,6 +3662,7 @@ syn_cache_get(struct sockaddr *src, stru tp->t_template = tcp_template(tp); if (tp->t_template == 0) { tp = tcp_drop(tp, ENOBUFS); /* destroys socket */ + in_pcbsounlock_rele(inp, so); so = NULL; goto abort; } @@ -3697,6 +3715,7 @@ syn_cache_get(struct sockaddr *src, stru tp->last_ack_sent = tp->rcv_nxt; tcpstat_inc(tcps_sc_completed); + in_pcbsounlock_rele(oldinp, oldso); syn_cache_put(sc); return (so); @@ -3707,8 +3726,10 @@ abort: m_freem(m); if (so != NULL) soabort(so); - syn_cache_put(sc); tcpstat_inc(tcps_sc_aborted); + in_pcbsounlock_rele(inp, so); + in_pcbsounlock_rele(oldinp, oldso); + syn_cache_put(sc); return ((struct socket *)(-1)); } @@ -3810,7 +3831,7 @@ syn_cache_add(struct sockaddr *src, stru struct syn_cache_head *scp; struct mbuf *ipopts; - NET_ASSERT_LOCKED(); + soassertlocked(so); tp = sototcpcb(so); @@ -3968,18 +3989,17 @@ syn_cache_add(struct sockaddr *src, stru if (syn_cache_respond(sc, m, now, do_ecn) == 0) { mtx_enter(&syn_cache_mtx); /* - * XXXSMP Currently exclusive netlock prevents another insert - * after our syn_cache_lookup() and before syn_cache_insert(). - * Double insert should be handled and not rely on netlock. + * Socket lock prevents another insert after our + * syn_cache_lookup() and before syn_cache_insert(). */ syn_cache_insert(sc, tp); mtx_leave(&syn_cache_mtx); tcpstat_inc(tcps_sndacks); tcpstat_inc(tcps_sndtotal); } else { + tcpstat_inc(tcps_sc_dropped); in_pcbunref(sc->sc_inplisten); syn_cache_put(sc); - tcpstat_inc(tcps_sc_dropped); } return (0); Index: sys/netinet/tcp_timer.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet/tcp_timer.c,v diff -u -p -u -p -r1.80 tcp_timer.c --- sys/netinet/tcp_timer.c 5 Jan 2025 12:18:48 -0000 1.80 +++ sys/netinet/tcp_timer.c 10 Jan 2025 22:29:28 -0000 @@ -106,6 +106,36 @@ tcp_timer_init(void) tcp_delack_msecs = TCP_DELACK_MSECS; } +static inline int +tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp, + u_int timer) +{ + KASSERT(timer < TCPT_NTIMERS); + + NET_LOCK_SHARED(); + *so = in_pcbsolock_ref(inp); + if (so == NULL) { + *tp = NULL; + return -1; + } + *tp = intotcpcb(inp); + /* Ignore canceled timeouts or timeouts that have been rescheduled. */ + if (*tp == NULL || !ISSET((*tp)->t_flags, TF_TIMER << timer) || + timeout_pending(&(*tp)->t_timer[timer])) + return -1; + CLR((*tp)->t_flags, TF_TIMER << timer); + + return 0; +} + +static inline void +tcp_timer_leave(struct inpcb *inp, struct socket *so) +{ + in_pcbsounlock_rele(inp, so); + NET_UNLOCK_SHARED(); + in_pcbunref(inp); +} + /* * Callout to process delayed ACKs for a TCPCB. */ @@ -113,6 +143,7 @@ void tcp_timer_delack(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; short ostate; @@ -121,15 +152,10 @@ tcp_timer_delack(void *arg) * for whatever reason, it will restart the delayed * ACK callout. */ - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) || - timeout_pending(&tp->t_timer[TCPT_DELACK])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_DELACK)) goto out; - CLR(tp->t_flags, TF_TMR_DELACK); - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -138,8 +164,7 @@ tcp_timer_delack(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } /* @@ -199,19 +224,15 @@ void tcp_timer_rexmt(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; - uint32_t rto; short ostate; + uint32_t rto; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) || - timeout_pending(&tp->t_timer[TCPT_REXMT])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_REXMT)) goto out; - CLR(tp->t_flags, TF_TMR_REXMT); - if ((tp->t_flags & TF_PMTUD_PEND) && inp && + if ((tp->t_flags & TF_PMTUD_PEND) && SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) { struct sockaddr_in sin; @@ -249,7 +270,7 @@ tcp_timer_rexmt(void *arg) tp->t_softerror : ETIMEDOUT); goto out; } - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -270,13 +291,13 @@ tcp_timer_rexmt(void *arg) * lots more sophisticated searching to find the right * value here... */ - if (ip_mtudisc && inp && + if (ip_mtudisc && TCPS_HAVEESTABLISHED(tp->t_state) && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { struct rtentry *rt = NULL; /* No data to send means path mtu is not a problem */ - if (!inp->inp_socket->so_snd.sb_cc) + if (!READ_ONCE(so->so_snd.sb_cc)) goto leave; rt = in_pcbrtentry(inp); @@ -391,31 +412,26 @@ tcp_timer_rexmt(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void tcp_timer_persist(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; - uint32_t rto; short ostate; uint64_t now; + uint32_t rto; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) || - timeout_pending(&tp->t_timer[TCPT_PERSIST])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_PERSIST)) goto out; - CLR(tp->t_flags, TF_TMR_PERSIST); if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) goto out; - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -445,26 +461,21 @@ tcp_timer_persist(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void tcp_timer_keep(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; short ostate; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) || - timeout_pending(&tp->t_timer[TCPT_KEEP])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_KEEP)) goto out; - CLR(tp->t_flags, TF_TMR_KEEP); - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -475,7 +486,7 @@ tcp_timer_keep(void *arg) goto out; } if ((atomic_load_int(&tcp_always_keepalive) || - inp->inp_socket->so_options & SO_KEEPALIVE) && + so->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { int maxidle; uint64_t now; @@ -509,28 +520,23 @@ tcp_timer_keep(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void tcp_timer_2msl(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; short ostate; - int maxidle; uint64_t now; + int maxidle; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) || - timeout_pending(&tp->t_timer[TCPT_2MSL])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_2MSL)) goto out; - CLR(tp->t_flags, TF_TMR_2MSL); - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -546,8 +552,7 @@ tcp_timer_2msl(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void Index: sys/netinet6/in6_proto.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet6/in6_proto.c,v diff -u -p -u -p -r1.124 in6_proto.c --- sys/netinet6/in6_proto.c 5 Jan 2025 12:36:48 -0000 1.124 +++ sys/netinet6/in6_proto.c 10 Jan 2025 22:29:28 -0000 @@ -147,7 +147,8 @@ const struct protosw inet6sw[] = { .pr_type = SOCK_STREAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_TCP, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE| + PR_MPINPUT, .pr_input = tcp_input, .pr_ctlinput = tcp6_ctlinput, .pr_ctloutput = tcp_ctloutput, Index: sys/netinet6/nd6.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet6/nd6.c,v diff -u -p -u -p -r1.283 nd6.c --- sys/netinet6/nd6.c 4 Sep 2024 07:54:52 -0000 1.283 +++ sys/netinet6/nd6.c 10 Jan 2025 22:29:28 -0000 @@ -709,7 +709,9 @@ nd6_nud_hint(struct rtentry *rt) struct llinfo_nd6 *ln; struct ifnet *ifp; + /* XXX NET_ASSERT_LOCKED_EXCLUSIVE(); + */ ifp = if_get(rt->rt_ifidx); if (ifp == NULL)