Index: sys/dev/pci/if_ixl.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v diff -u -p -u -p -r1.102 if_ixl.c --- sys/dev/pci/if_ixl.c 30 Oct 2024 18:02:45 -0000 1.102 +++ sys/dev/pci/if_ixl.c 3 Apr 2025 14:33:52 -0000 @@ -883,6 +883,8 @@ struct ixl_rx_wb_desc_16 { #define IXL_RX_DESC_PTYPE_SHIFT 30 #define IXL_RX_DESC_PTYPE_MASK (0xffULL << IXL_RX_DESC_PTYPE_SHIFT) +#define IXL_RX_DESC_PTYPE_MAC_IPV4_TCP 26 +#define IXL_RX_DESC_PTYPE_MAC_IPV6_TCP 92 #define IXL_RX_DESC_PLEN_SHIFT 38 #define IXL_RX_DESC_PLEN_MASK (0x3fffULL << IXL_RX_DESC_PLEN_SHIFT) @@ -1975,6 +1977,11 @@ ixl_attach(struct device *parent, struct IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; + ifp->if_capabilities |= IFCAP_LRO; +#if 0 + /* for now tcplro at ixl(4) is default off */ + ifp->if_xflags |= IFXF_LRO; +#endif ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status); @@ -2949,6 +2956,14 @@ ixl_start(struct ifqueue *ifq) continue; } + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + struct ether_extracted ext; + + ether_extract_headers(m, &ext); + KASSERT(m->m_pkthdr.len == ext.iplen + (ext.evh ? + sizeof(*ext.evh) : sizeof(*ext.eh))); + } + bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize, BUS_DMASYNC_PREWRITE); @@ -3255,9 +3270,11 @@ ixl_rxeof(struct ixl_softc *sc, struct i struct ixl_rx_map *rxm; bus_dmamap_t map; unsigned int cons, prod; + struct mbuf_list mltcp = MBUF_LIST_INITIALIZER(); struct mbuf_list ml = MBUF_LIST_INITIALIZER(); struct mbuf *m; uint64_t word; + unsigned int ptype; unsigned int len; unsigned int mask; int done = 0; @@ -3294,6 +3311,8 @@ ixl_rxeof(struct ixl_softc *sc, struct i m = rxm->rxm_m; rxm->rxm_m = NULL; + ptype = (word & IXL_RX_DESC_PTYPE_MASK) + >> IXL_RX_DESC_PTYPE_SHIFT; len = (word & IXL_RX_DESC_PLEN_MASK) >> IXL_RX_DESC_PLEN_SHIFT; m->m_len = len; m->m_pkthdr.len = 0; @@ -3314,7 +3333,6 @@ ixl_rxeof(struct ixl_softc *sc, struct i lemtoh32(&rxd->filter_status); m->m_pkthdr.csum_flags |= M_FLOWID; } - #if NVLAN > 0 if (ISSET(word, IXL_RX_DESC_L2TAG1P)) { m->m_pkthdr.ether_vtag = @@ -3322,9 +3340,14 @@ ixl_rxeof(struct ixl_softc *sc, struct i SET(m->m_flags, M_VLANTAG); } #endif - ixl_rx_checksum(m, word); - ml_enqueue(&ml, m); + + if (ISSET(ifp->if_xflags, IFXF_LRO) && + (ptype == IXL_RX_DESC_PTYPE_MAC_IPV4_TCP || + ptype == IXL_RX_DESC_PTYPE_MAC_IPV6_TCP)) + tcp_enqueue_lro(&mltcp, m); + else + ml_enqueue(&ml, m); } else { ifp->if_ierrors++; /* XXX */ m_freem(m); @@ -3341,8 +3364,14 @@ ixl_rxeof(struct ixl_softc *sc, struct i } while (cons != prod); if (done) { + int livelocked = 0; + rxr->rxr_cons = cons; + if (ifiq_input(ifiq, &mltcp)) + livelocked = 1; if (ifiq_input(ifiq, &ml)) + livelocked = 1; + if (livelocked) if_rxr_livelocked(&rxr->rxr_acct); ixl_rxfill(sc, rxr); } Index: sys/netinet/tcp_input.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet/tcp_input.c,v diff -u -p -u -p -r1.434 tcp_input.c --- sys/netinet/tcp_input.c 10 Mar 2025 15:11:46 -0000 1.434 +++ sys/netinet/tcp_input.c 3 Apr 2025 14:33:52 -0000 @@ -84,6 +84,7 @@ #include #include +#include #include #include #include @@ -4229,4 +4230,226 @@ syn_cache_respond(struct syn_cache *sc, } in_pcbunref(inp); return (error); +} + +/* + * Check if mbuf contains TCP packet that could be merged + */ +int +tcp_mergeable_single(struct mbuf *m, struct ether_extracted *ext) +{ + /* Don't merge packets with invalid TCP checksum. */ + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_IN_OK)) + return 0; + /* Don't merge packets without RSS hash. */ + if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) + return 0; + + if (ext->ip4) { + /* Don't merge packets with invalid IP header checksum. */ + if (!ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK)) + return 0; + /* Don't merge IPv4 packets with option headers. */ + if (ext->iphlen != sizeof(struct ip)) + return 0; + } + + /* Check TCP header. */ + if (!ext->tcp) + return 0; + + /* Don't merge empty segments. */ + if (ext->paylen == 0) + return 0; + /* Just ACK and PUSH TCP flags are allowed. */ + if (ISSET(ext->tcp->th_flags, TH_ACK|TH_PUSH) != ext->tcp->th_flags) + return 0; + /* TCP ACK flag has to be set. */ + if (!ISSET(ext->tcp->th_flags, TH_ACK)) + return 0; + return 1; +} + +/* + * Check if head and tail are mergeable + */ +int +tcp_mergeable_both(struct ether_extracted *head, struct ether_extracted *tail) +{ + /* Check IP header. */ + if (head->ip4 && tail->ip4) { + /* Check IPv4 addresses. */ + if (head->ip4->ip_src.s_addr != tail->ip4->ip_src.s_addr || + head->ip4->ip_dst.s_addr != tail->ip4->ip_dst.s_addr) + return 0; + /* Check max. IPv4 length. */ + if (head->iplen + tail->iplen > IP_MAXPACKET) + return 0; + } else if (head->ip6 && tail->ip6) { + /* Check IPv6 addresses. */ + if (!IN6_ARE_ADDR_EQUAL(&head->ip6->ip6_src, + &tail->ip6->ip6_src) || + !IN6_ARE_ADDR_EQUAL(&head->ip6->ip6_dst, + &tail->ip6->ip6_dst)) + return 0; + /* Check max. IPv6 length. */ + if ((head->iplen - head->iphlen) + + (tail->iplen - tail->iphlen) > IPV6_MAXPACKET) + return 0; + } else + return 0; + + /* Check TCP ports. */ + if (head->tcp->th_sport != tail->tcp->th_sport || + head->tcp->th_dport != tail->tcp->th_dport) + return 0; + /* Check for contiguous segments. */ + if (ntohl(head->tcp->th_seq) + head->paylen != ntohl(tail->tcp->th_seq)) + return 0; + + /* Ignore segments with different TCP options. */ + if (head->tcphlen != tail->tcphlen) + return 0; + /* Check all TCP options */ + if (head->tcphlen > sizeof(struct tcphdr)) { + u_char *hopt = (u_char *)head->tcp + sizeof(struct tcphdr); + u_char *topt = (u_char *)tail->tcp + sizeof(struct tcphdr); + int optsize = head->tcphlen - sizeof(struct tcphdr); + int optlen; + + for (; optsize > 0; optsize -= optlen) { + /* Ignore segments with different TCP options. */ + if (hopt[0] != topt[0]) + return 0; + /* Get option length */ + if (hopt[0] == TCPOPT_NOP) { + optlen = 1; + } else { + if (optsize < 2) + return 0; /* Illegal length */ + optlen = hopt[1]; + if (optlen > optsize) + return 0; /* Illegal length */ + if (hopt[1] != topt[1]) + return 0; + } + if (hopt[0] != TCPOPT_NOP && + hopt[0] != TCPOPT_TIMESTAMP) { + return 0; /* Unsupported TCP option */ + } + hopt += optlen; + topt += optlen; + } + } + return 1; +} + +/* + * Concatenation of head and tail mbuf. + */ +void +tcp_concatenate(struct mbuf *mhead, struct ether_extracted *head, + struct mbuf *mtail, struct ether_extracted *tail) +{ + struct mbuf *m; + unsigned int hdrlen; + + /* Remove ethernet padding. */ + if (mhead->m_pkthdr.len > head->iplen) + m_adj(mhead, head->iplen - mhead->m_pkthdr.len); + /* Adjust IP header lenght. */ + if (head->ip4) { + head->ip4->ip_len = htons(head->iplen + tail->paylen); + } else if (head->ip6) { + head->ip6->ip6_plen = + htons(head->iplen - head->iphlen + tail->paylen); + } + + /* Combine TCP flags from head and tail. */ + if (ISSET(tail->tcp->th_flags, TH_PUSH)) + SET(head->tcp->th_flags, TH_PUSH); + /* Adjust TCP header sequence space. */ + head->tcp->th_ack = tail->tcp->th_ack; + head->tcp->th_win = tail->tcp->th_win; + + /* Calculate header length of tail packet. */ + hdrlen = sizeof(*tail->eh); + if (tail->evh) + hdrlen = sizeof(*tail->evh); + hdrlen += tail->iphlen; + hdrlen += tail->tcphlen; + /* Skip protocol headers in tail. */ + m_adj(mtail, hdrlen); + CLR(mtail->m_flags, M_PKTHDR); + + /* Concatenate */ + for (m = mhead; m->m_next; m = m->m_next) + ; + m->m_next = mtail; + mhead->m_pkthdr.len += tail->paylen; + + /* Flag mbuf as TSO packet with MSS. */ + if (!ISSET(mhead->m_pkthdr.csum_flags, M_TCP_TSO)) { + /* Set CSUM_OUT flags in case of forwarding. */ + SET(mhead->m_pkthdr.csum_flags, M_TCP_CSUM_OUT); + head->tcp->th_sum = 0; + if (head->ip4) { + SET(mhead->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT); + head->ip4->ip_sum = 0; + } + SET(mhead->m_pkthdr.csum_flags, M_TCP_TSO); + tcpstat_inc(tcps_inhwlro); + tcpstat_inc(tcps_inpktlro); /* count head */ + } + mhead->m_pkthdr.ph_mss = max(mhead->m_pkthdr.ph_mss, tail->paylen); + tcpstat_inc(tcps_inpktlro); /* count tail */ +} + +void +tcp_enqueue_lro(struct mbuf_list *ml, struct mbuf *mtail) +{ + struct mbuf *mhead; + struct ether_extracted head, tail; + + ether_extract_headers(mtail, &tail); + if (!tcp_mergeable_single(mtail, &tail)) { + mtail->m_pkthdr.ph_mss = 0; + goto dontmerge; + } + mtail->m_pkthdr.ph_mss = tail.paylen; + + for (mhead = ml->ml_head; mhead != NULL; mhead = mhead->m_nextpkt) { + /* This packet was considered not mergable before. */ + if (mhead->m_pkthdr.ph_mss == 0) + continue; + + /* Use RSS hash to skip packets of different connections. */ + if (mhead->m_pkthdr.ph_flowid != mtail->m_pkthdr.ph_flowid) + continue; + /* Don't merge packets of different VLANs with offloading. */ + if (ISSET(mhead->m_flags, M_VLANTAG) != + ISSET(mtail->m_flags, M_VLANTAG)) + continue; + if (ISSET(mhead->m_flags, M_VLANTAG) && + EVL_VLANOFTAG(mhead->m_pkthdr.ether_vtag) != + EVL_VLANOFTAG(mtail->m_pkthdr.ether_vtag)) + continue; + + ether_extract_headers(mhead, &head); + /* Don't merge packets of different VLANs without offloading. */ + if (head.evh && tail.evh) { + if (EVL_VLANOFTAG(head.evh->evl_tag) != + EVL_VLANOFTAG(tail.evh->evl_tag)) + continue; + } else if (head.evh || tail.evh) + continue; + + if (!tcp_mergeable_both(&head, &tail)) + continue; + + tcp_concatenate(mhead, &head, mtail, &tail); + return; + } + dontmerge: + ml_enqueue(ml, mtail); } Index: sys/netinet/tcp_var.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/netinet/tcp_var.h,v diff -u -p -u -p -r1.186 tcp_var.h --- sys/netinet/tcp_var.h 2 Mar 2025 21:28:32 -0000 1.186 +++ sys/netinet/tcp_var.h 3 Apr 2025 14:33:52 -0000 @@ -720,6 +720,7 @@ void tcp_init(void); int tcp_input(struct mbuf **, int *, int, int, struct netstack *); int tcp_mss(struct tcpcb *, int); void tcp_mss_update(struct tcpcb *); +void tcp_enqueue_lro(struct mbuf_list *, struct mbuf *); u_int tcp_hdrsz(struct tcpcb *); void tcp_mtudisc(struct inpcb *, int); void tcp_mtudisc_increase(struct inpcb *, int);