[28449] | 1 | /* $Id: tcp_output.c 98103 2023-01-17 14:15:46Z vboxsync $ */
|
---|
| 2 | /** @file
|
---|
| 3 | * NAT - TCP output.
|
---|
| 4 | */
|
---|
| 5 |
|
---|
[1] | 6 | /*
|
---|
[98103] | 7 | * Copyright (C) 2006-2023 Oracle and/or its affiliates.
|
---|
[28449] | 8 | *
|
---|
[96407] | 9 | * This file is part of VirtualBox base platform packages, as
|
---|
| 10 | * available from https://www.virtualbox.org.
|
---|
| 11 | *
|
---|
| 12 | * This program is free software; you can redistribute it and/or
|
---|
| 13 | * modify it under the terms of the GNU General Public License
|
---|
| 14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
| 15 | * License.
|
---|
| 16 | *
|
---|
| 17 | * This program is distributed in the hope that it will be useful, but
|
---|
| 18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 20 | * General Public License for more details.
|
---|
| 21 | *
|
---|
| 22 | * You should have received a copy of the GNU General Public License
|
---|
| 23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
| 24 | *
|
---|
| 25 | * SPDX-License-Identifier: GPL-3.0-only
|
---|
[28449] | 26 | */
|
---|
| 27 |
|
---|
| 28 | /*
|
---|
| 29 | * This code is based on:
|
---|
| 30 | *
|
---|
[1] | 31 | * Copyright (c) 1982, 1986, 1988, 1990, 1993
|
---|
[14470] | 32 | * The Regents of the University of California. All rights reserved.
|
---|
[1] | 33 | *
|
---|
| 34 | * Redistribution and use in source and binary forms, with or without
|
---|
| 35 | * modification, are permitted provided that the following conditions
|
---|
| 36 | * are met:
|
---|
| 37 | * 1. Redistributions of source code must retain the above copyright
|
---|
| 38 | * notice, this list of conditions and the following disclaimer.
|
---|
| 39 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
| 40 | * notice, this list of conditions and the following disclaimer in the
|
---|
| 41 | * documentation and/or other materials provided with the distribution.
|
---|
[95573] | 42 | * 3. Neither the name of the University nor the names of its contributors
|
---|
[1] | 43 | * may be used to endorse or promote products derived from this software
|
---|
| 44 | * without specific prior written permission.
|
---|
| 45 | *
|
---|
| 46 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
---|
| 47 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
| 48 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
| 49 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
---|
| 50 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
| 51 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
| 52 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
| 53 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
| 54 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
| 55 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
| 56 | * SUCH DAMAGE.
|
---|
| 57 | *
|
---|
[14470] | 58 | * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93
|
---|
[1] | 59 | * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp
|
---|
| 60 | */
|
---|
| 61 |
|
---|
| 62 | /*
|
---|
| 63 | * Changes and additions relating to SLiRP
|
---|
| 64 | * Copyright (c) 1995 Danny Gasparovski.
|
---|
[1033] | 65 | *
|
---|
| 66 | * Please read the file COPYRIGHT for the
|
---|
[1] | 67 | * terms and conditions of the copyright.
|
---|
| 68 | */
|
---|
| 69 |
|
---|
| 70 | #include <slirp.h>
|
---|
| 71 |
|
---|
| 72 | /*
|
---|
| 73 | * Since this is only used in "stats socket", we give meaning
|
---|
| 74 | * names instead of the REAL names
|
---|
| 75 | */
|
---|
[15074] | 76 | const char * const tcpstates[] =
|
---|
| 77 | {
|
---|
| 78 | /* "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD", */
|
---|
| 79 | "REDIRECT", "LISTEN", "SYN_SENT", "SYN_RCVD",
|
---|
| 80 | "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING",
|
---|
| 81 | "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT",
|
---|
[1] | 82 | };
|
---|
| 83 |
|
---|
[15074] | 84 | static const u_char tcp_outflags[TCP_NSTATES] =
|
---|
| 85 | {
|
---|
| 86 | TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK,
|
---|
| 87 | TH_ACK, TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK,
|
---|
| 88 | TH_FIN|TH_ACK, TH_ACK, TH_ACK,
|
---|
[1] | 89 | };
|
---|
| 90 |
|
---|
| 91 |
|
---|
[14470] | 92 | #define MAX_TCPOPTLEN 32 /* max # bytes that go in options */
|
---|
[1] | 93 |
|
---|
| 94 | /*
|
---|
| 95 | * Tcp output routine: figure out what should be sent and send it.
|
---|
| 96 | */
|
---|
| 97 | int
|
---|
[1033] | 98 | tcp_output(PNATState pData, register struct tcpcb *tp)
|
---|
[1] | 99 | {
|
---|
[15074] | 100 | register struct socket *so = tp->t_socket;
|
---|
| 101 | register long len, win;
|
---|
| 102 | int off, flags, error;
|
---|
[22896] | 103 | register struct mbuf *m = NULL;
|
---|
[15074] | 104 | register struct tcpiphdr *ti;
|
---|
| 105 | u_char opt[MAX_TCPOPTLEN];
|
---|
| 106 | unsigned optlen, hdrlen;
|
---|
| 107 | int idle, sendalot;
|
---|
[35163] | 108 | int size = 0;
|
---|
[1033] | 109 |
|
---|
[38110] | 110 | LogFlowFunc(("ENTER: tcp_output: tp = %R[tcpcb793]\n", tp));
|
---|
[1033] | 111 |
|
---|
[15074] | 112 | /*
|
---|
| 113 | * Determine length of data that should be transmitted,
|
---|
| 114 | * and flags that will be used.
|
---|
| 115 | * If there is some data or critical controls (SYN, RST)
|
---|
| 116 | * to send, then transmit; otherwise, investigate further.
|
---|
| 117 | */
|
---|
| 118 | idle = (tp->snd_max == tp->snd_una);
|
---|
| 119 | if (idle && tp->t_idle >= tp->t_rxtcur)
|
---|
[14470] | 120 | /*
|
---|
[15074] | 121 | * We have been idle for "a while" and no acks are
|
---|
| 122 | * expected to clock out any data we send --
|
---|
| 123 | * slow start to get ack "clock" running again.
|
---|
[14470] | 124 | */
|
---|
[15074] | 125 | tp->snd_cwnd = tp->t_maxseg;
|
---|
| 126 |
|
---|
[1] | 127 | again:
|
---|
[15074] | 128 | sendalot = 0;
|
---|
| 129 | off = tp->snd_nxt - tp->snd_una;
|
---|
| 130 | win = min(tp->snd_wnd, tp->snd_cwnd);
|
---|
[1] | 131 |
|
---|
[15074] | 132 | flags = tcp_outflags[tp->t_state];
|
---|
[1033] | 133 |
|
---|
[34103] | 134 | Log2((" --- tcp_output flags = 0x%x\n", flags));
|
---|
[1033] | 135 |
|
---|
[15074] | 136 | /*
|
---|
| 137 | * If in persist timeout with window of 0, send 1 byte.
|
---|
| 138 | * Otherwise, if window is small but nonzero
|
---|
| 139 | * and timer expired, we will send what we can
|
---|
| 140 | * and go to transmit state.
|
---|
| 141 | */
|
---|
| 142 | if (tp->t_force)
|
---|
| 143 | {
|
---|
| 144 | if (win == 0)
|
---|
| 145 | {
|
---|
| 146 | /*
|
---|
| 147 | * If we still have some data to send, then
|
---|
| 148 | * clear the FIN bit. Usually this would
|
---|
| 149 | * happen below when it realizes that we
|
---|
| 150 | * aren't sending all the data. However,
|
---|
| 151 | * if we have exactly 1 byte of unset data,
|
---|
| 152 | * then it won't clear the FIN bit below,
|
---|
| 153 | * and if we are in persist state, we wind
|
---|
| 154 | * up sending the packet without recording
|
---|
| 155 | * that we sent the FIN bit.
|
---|
| 156 | *
|
---|
| 157 | * We can't just blindly clear the FIN bit,
|
---|
| 158 | * because if we don't have any more data
|
---|
| 159 | * to send then the probe will be the FIN
|
---|
| 160 | * itself.
|
---|
| 161 | */
|
---|
[30045] | 162 | if (off < SBUF_LEN(&so->so_snd))
|
---|
[15074] | 163 | flags &= ~TH_FIN;
|
---|
| 164 | win = 1;
|
---|
[14470] | 165 | }
|
---|
[15074] | 166 | else
|
---|
| 167 | {
|
---|
| 168 | tp->t_timer[TCPT_PERSIST] = 0;
|
---|
| 169 | tp->t_rxtshift = 0;
|
---|
[14470] | 170 | }
|
---|
[15074] | 171 | }
|
---|
[1033] | 172 |
|
---|
[30045] | 173 | len = min(SBUF_LEN(&so->so_snd), win) - off;
|
---|
[15074] | 174 | if (len < 0)
|
---|
| 175 | {
|
---|
[14470] | 176 | /*
|
---|
[15074] | 177 | * If FIN has been sent but not acked,
|
---|
| 178 | * but we haven't been called to retransmit,
|
---|
| 179 | * len will be -1. Otherwise, window shrank
|
---|
| 180 | * after we sent into it. If window shrank to 0,
|
---|
| 181 | * cancel pending retransmit and pull snd_nxt
|
---|
| 182 | * back to (closed) window. We will enter persist
|
---|
| 183 | * state below. If the window didn't close completely,
|
---|
| 184 | * just wait for an ACK.
|
---|
[14470] | 185 | */
|
---|
[15074] | 186 | len = 0;
|
---|
| 187 | if (win == 0)
|
---|
| 188 | {
|
---|
| 189 | tp->t_timer[TCPT_REXMT] = 0;
|
---|
| 190 | tp->snd_nxt = tp->snd_una;
|
---|
[14470] | 191 | }
|
---|
[15074] | 192 | }
|
---|
| 193 | if (len > tp->t_maxseg)
|
---|
| 194 | {
|
---|
| 195 | len = tp->t_maxseg;
|
---|
| 196 | sendalot = 1;
|
---|
| 197 | }
|
---|
[30045] | 198 | if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + SBUF_LEN(&so->so_snd)))
|
---|
[15074] | 199 | flags &= ~TH_FIN;
|
---|
[1] | 200 |
|
---|
[15074] | 201 | win = sbspace(&so->so_rcv);
|
---|
[1] | 202 |
|
---|
[15074] | 203 | /*
|
---|
| 204 | * Sender silly window avoidance. If connection is idle
|
---|
| 205 | * and can send all data, a maximum segment,
|
---|
| 206 | * at least a maximum default-size segment do it,
|
---|
| 207 | * or are forced, do it; otherwise don't bother.
|
---|
| 208 | * If peer's buffer is tiny, then send
|
---|
| 209 | * when window is at least half open.
|
---|
| 210 | * If retransmitting (possibly after persist timer forced us
|
---|
| 211 | * to send into a small window), then must resend.
|
---|
| 212 | */
|
---|
| 213 | if (len)
|
---|
| 214 | {
|
---|
| 215 | if (len == tp->t_maxseg)
|
---|
| 216 | goto send;
|
---|
| 217 | if ((1 || idle || tp->t_flags & TF_NODELAY) &&
|
---|
[30045] | 218 | len + off >= SBUF_LEN(&so->so_snd))
|
---|
[15074] | 219 | goto send;
|
---|
| 220 | if (tp->t_force)
|
---|
| 221 | goto send;
|
---|
| 222 | if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
|
---|
| 223 | goto send;
|
---|
| 224 | if (SEQ_LT(tp->snd_nxt, tp->snd_max))
|
---|
| 225 | goto send;
|
---|
| 226 | }
|
---|
[1] | 227 |
|
---|
[15074] | 228 | /*
|
---|
| 229 | * Compare available window to amount of window
|
---|
| 230 | * known to peer (as advertised window less
|
---|
| 231 | * next expected input). If the difference is at least two
|
---|
| 232 | * max size segments, or at least 50% of the maximum possible
|
---|
| 233 | * window, then want to send a window update to peer.
|
---|
| 234 | */
|
---|
| 235 | if (win > 0)
|
---|
| 236 | {
|
---|
[14470] | 237 | /*
|
---|
[15074] | 238 | * "adv" is the amount we can increase the window,
|
---|
| 239 | * taking into account that we are limited by
|
---|
| 240 | * TCP_MAXWIN << tp->rcv_scale.
|
---|
[14470] | 241 | */
|
---|
[51904] | 242 | long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale);
|
---|
| 243 | if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
|
---|
| 244 | adv -= tp->rcv_adv - tp->rcv_nxt;
|
---|
[1] | 245 |
|
---|
[15074] | 246 | if (adv >= (long) (2 * tp->t_maxseg))
|
---|
| 247 | goto send;
|
---|
[30045] | 248 | if (2 * adv >= (long) SBUF_SIZE(&so->so_rcv))
|
---|
[15074] | 249 | goto send;
|
---|
| 250 | }
|
---|
[1] | 251 |
|
---|
[15074] | 252 | /*
|
---|
| 253 | * Send if we owe peer an ACK.
|
---|
| 254 | */
|
---|
| 255 | if (tp->t_flags & TF_ACKNOW)
|
---|
| 256 | goto send;
|
---|
| 257 | if (flags & (TH_SYN|TH_RST))
|
---|
| 258 | goto send;
|
---|
| 259 | if (SEQ_GT(tp->snd_up, tp->snd_una))
|
---|
| 260 | goto send;
|
---|
| 261 | /*
|
---|
| 262 | * If our state indicates that FIN should be sent
|
---|
| 263 | * and we have not yet done so, or we're retransmitting the FIN,
|
---|
| 264 | * then we need to send.
|
---|
| 265 | */
|
---|
| 266 | if ( flags & TH_FIN
|
---|
| 267 | && ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
|
---|
| 268 | goto send;
|
---|
[1033] | 269 |
|
---|
[15074] | 270 | /*
|
---|
| 271 | * TCP window updates are not reliable, rather a polling protocol
|
---|
| 272 | * using ``persist'' packets is used to insure receipt of window
|
---|
| 273 | * updates. The three ``states'' for the output side are:
|
---|
| 274 | * idle not doing retransmits or persists
|
---|
| 275 | * persisting to move a small or zero window
|
---|
| 276 | * (re)transmitting and thereby not persisting
|
---|
| 277 | *
|
---|
| 278 | * tp->t_timer[TCPT_PERSIST]
|
---|
| 279 | * is set when we are in persist state.
|
---|
| 280 | * tp->t_force
|
---|
| 281 | * is set when we are called to send a persist packet.
|
---|
| 282 | * tp->t_timer[TCPT_REXMT]
|
---|
| 283 | * is set when we are retransmitting
|
---|
| 284 | * The output side is idle when both timers are zero.
|
---|
| 285 | *
|
---|
| 286 | * If send window is too small, there is data to transmit, and no
|
---|
| 287 | * retransmit or persist is pending, then go to persist state.
|
---|
| 288 | * If nothing happens soon, send when timer expires:
|
---|
| 289 | * if window is nonzero, transmit what we can,
|
---|
| 290 | * otherwise force out a byte.
|
---|
| 291 | */
|
---|
[30045] | 292 | if ( SBUF_LEN(&so->so_snd)
|
---|
[15074] | 293 | && tp->t_timer[TCPT_REXMT] == 0
|
---|
| 294 | && tp->t_timer[TCPT_PERSIST] == 0)
|
---|
| 295 | {
|
---|
| 296 | tp->t_rxtshift = 0;
|
---|
| 297 | tcp_setpersist(tp);
|
---|
| 298 | }
|
---|
[1] | 299 |
|
---|
[15074] | 300 | /*
|
---|
| 301 | * No reason to send a segment, just return.
|
---|
| 302 | */
|
---|
| 303 | tcpstat.tcps_didnuttin++;
|
---|
| 304 |
|
---|
[38110] | 305 | LogFlowFuncLeave();
|
---|
[15074] | 306 | return (0);
|
---|
| 307 |
|
---|
[1] | 308 | send:
|
---|
[38110] | 309 | LogFlowFunc(("send\n"));
|
---|
[15074] | 310 | /*
|
---|
| 311 | * Before ESTABLISHED, force sending of initial options
|
---|
| 312 | * unless TCP set not to do any options.
|
---|
| 313 | * NOTE: we assume that the IP/TCP header plus TCP options
|
---|
| 314 | * always fit in a single mbuf, leaving room for a maximum
|
---|
| 315 | * link header, i.e.
|
---|
| 316 | * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
|
---|
| 317 | */
|
---|
| 318 | optlen = 0;
|
---|
| 319 | hdrlen = sizeof (struct tcpiphdr);
|
---|
| 320 | if (flags & TH_SYN)
|
---|
| 321 | {
|
---|
| 322 | tp->snd_nxt = tp->iss;
|
---|
| 323 | if ((tp->t_flags & TF_NOOPT) == 0)
|
---|
| 324 | {
|
---|
| 325 | u_int16_t mss;
|
---|
[1] | 326 |
|
---|
[15074] | 327 | opt[0] = TCPOPT_MAXSEG;
|
---|
| 328 | opt[1] = 4;
|
---|
[25822] | 329 | mss = RT_H2N_U16((u_int16_t) tcp_mss(pData, tp, 0));
|
---|
[15074] | 330 | memcpy((caddr_t)(opt + 2), (caddr_t)&mss, sizeof(mss));
|
---|
| 331 | optlen = 4;
|
---|
[1] | 332 |
|
---|
[15074] | 333 | #if 0
|
---|
| 334 | if ( (tp->t_flags & TF_REQ_SCALE)
|
---|
| 335 | && ( (flags & TH_ACK) == 0
|
---|
| 336 | || (tp->t_flags & TF_RCVD_SCALE)))
|
---|
| 337 | {
|
---|
[25822] | 338 | *((u_int32_t *) (opt + optlen)) = RT_H2N_U32( TCPOPT_NOP << 24
|
---|
| 339 | | TCPOPT_WINDOW << 16
|
---|
| 340 | | TCPOLEN_WINDOW << 8
|
---|
| 341 | | tp->request_r_scale);
|
---|
[15074] | 342 | optlen += 4;
|
---|
| 343 | }
|
---|
| 344 | #endif
|
---|
[14470] | 345 | }
|
---|
[15074] | 346 | }
|
---|
[1033] | 347 |
|
---|
[15074] | 348 | /*
|
---|
| 349 | * Send a timestamp and echo-reply if this is a SYN and our side
|
---|
| 350 | * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
|
---|
| 351 | * and our peer have sent timestamps in our SYN's.
|
---|
| 352 | */
|
---|
| 353 | #if 0
|
---|
| 354 | if ( (tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP
|
---|
| 355 | && (flags & TH_RST) == 0
|
---|
| 356 | && ( (flags & (TH_SYN|TH_ACK)) == TH_SYN
|
---|
| 357 | || (tp->t_flags & TF_RCVD_TSTMP)))
|
---|
| 358 | {
|
---|
| 359 | u_int32_t *lp = (u_int32_t *)(opt + optlen);
|
---|
[1033] | 360 |
|
---|
[15074] | 361 | /* Form timestamp option as shown in appendix A of RFC 1323. */
|
---|
[25822] | 362 | *lp++ = RT_H2N_U32_C(TCPOPT_TSTAMP_HDR);
|
---|
| 363 | *lp++ = RT_H2N_U32(tcp_now);
|
---|
| 364 | *lp = RT_H2N_U32(tp->ts_recent);
|
---|
[15074] | 365 | optlen += TCPOLEN_TSTAMP_APPA;
|
---|
| 366 | }
|
---|
| 367 | #endif
|
---|
| 368 | hdrlen += optlen;
|
---|
[1] | 369 |
|
---|
[15074] | 370 | /*
|
---|
| 371 | * Adjust data length if insertion of options will
|
---|
| 372 | * bump the packet length beyond the t_maxseg length.
|
---|
| 373 | */
|
---|
| 374 | if (len > tp->t_maxseg - optlen)
|
---|
| 375 | {
|
---|
| 376 | len = tp->t_maxseg - optlen;
|
---|
| 377 | sendalot = 1;
|
---|
| 378 | }
|
---|
[1] | 379 |
|
---|
[15074] | 380 | /*
|
---|
| 381 | * Grab a header mbuf, attaching a copy of data to
|
---|
| 382 | * be transmitted, and initialize the header from
|
---|
| 383 | * the template for sends on this connection.
|
---|
| 384 | */
|
---|
| 385 | if (len)
|
---|
| 386 | {
|
---|
| 387 | if (tp->t_force && len == 1)
|
---|
| 388 | tcpstat.tcps_sndprobe++;
|
---|
| 389 | else if (SEQ_LT(tp->snd_nxt, tp->snd_max))
|
---|
| 390 | {
|
---|
| 391 | tcpstat.tcps_sndrexmitpack++;
|
---|
| 392 | tcpstat.tcps_sndrexmitbyte += len;
|
---|
| 393 | }
|
---|
| 394 | else
|
---|
| 395 | {
|
---|
| 396 | tcpstat.tcps_sndpack++;
|
---|
| 397 | tcpstat.tcps_sndbyte += len;
|
---|
| 398 | }
|
---|
[1033] | 399 |
|
---|
[26404] | 400 | size = MCLBYTES;
|
---|
[23154] | 401 | if ((len + hdrlen + ETH_HLEN) < MSIZE)
|
---|
| 402 | size = MCLBYTES;
|
---|
| 403 | else if ((len + hdrlen + ETH_HLEN) < MCLBYTES)
|
---|
| 404 | size = MCLBYTES;
|
---|
| 405 | else if((len + hdrlen + ETH_HLEN) < MJUM9BYTES)
|
---|
| 406 | size = MJUM9BYTES;
|
---|
| 407 | else if ((len + hdrlen + ETH_HLEN) < MJUM16BYTES)
|
---|
| 408 | size = MJUM16BYTES;
|
---|
| 409 | else
|
---|
| 410 | AssertMsgFailed(("Unsupported size"));
|
---|
| 411 | m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
|
---|
[15074] | 412 | if (m == NULL)
|
---|
| 413 | {
|
---|
| 414 | /* error = ENOBUFS; */
|
---|
| 415 | error = 1;
|
---|
| 416 | goto out;
|
---|
[14470] | 417 | }
|
---|
[15074] | 418 | m->m_data += if_maxlinkhdr;
|
---|
[23154] | 419 | m->m_pkthdr.header = mtod(m, void *);
|
---|
[15074] | 420 | m->m_len = hdrlen;
|
---|
[1] | 421 |
|
---|
[14470] | 422 | /*
|
---|
[15074] | 423 | * This will always succeed, since we make sure our mbufs
|
---|
| 424 | * are big enough to hold one MSS packet + header + ... etc.
|
---|
[14470] | 425 | */
|
---|
[15074] | 426 | #if 0
|
---|
| 427 | if (len <= MHLEN - hdrlen - max_linkhdr)
|
---|
| 428 | {
|
---|
| 429 | #endif
|
---|
| 430 | sbcopy(&so->so_snd, off, (int) len, mtod(m, caddr_t) + hdrlen);
|
---|
| 431 | m->m_len += len;
|
---|
| 432 | #if 0
|
---|
| 433 | }
|
---|
| 434 | else
|
---|
| 435 | {
|
---|
| 436 | m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
|
---|
| 437 | if (m->m_next == 0)
|
---|
| 438 | len = 0;
|
---|
| 439 | }
|
---|
| 440 | #endif
|
---|
[14470] | 441 | /*
|
---|
[15074] | 442 | * If we're sending everything we've got, set PUSH.
|
---|
| 443 | * (This will keep happy those implementations which only
|
---|
| 444 | * give data to the user when a buffer fills or
|
---|
| 445 | * a PUSH comes in.)
|
---|
[14470] | 446 | */
|
---|
[63013] | 447 | if (off + len == (ssize_t)SBUF_LEN(&so->so_snd))
|
---|
[15074] | 448 | flags |= TH_PUSH;
|
---|
| 449 | }
|
---|
| 450 | else
|
---|
| 451 | {
|
---|
[63668] | 452 | bool fUninitializedTemplate = false;
|
---|
[15074] | 453 | if (tp->t_flags & TF_ACKNOW)
|
---|
| 454 | tcpstat.tcps_sndacks++;
|
---|
| 455 | else if (flags & (TH_SYN|TH_FIN|TH_RST))
|
---|
| 456 | tcpstat.tcps_sndctrl++;
|
---|
| 457 | else if (SEQ_GT(tp->snd_up, tp->snd_una))
|
---|
| 458 | tcpstat.tcps_sndurg++;
|
---|
[14470] | 459 | else
|
---|
[15074] | 460 | tcpstat.tcps_sndwinup++;
|
---|
| 461 |
|
---|
[23154] | 462 | if ((hdrlen + ETH_HLEN) < MSIZE)
|
---|
| 463 | {
|
---|
| 464 | size = MCLBYTES;
|
---|
[23369] | 465 | }
|
---|
[23154] | 466 | else if ((hdrlen + ETH_HLEN) < MCLBYTES)
|
---|
| 467 | {
|
---|
| 468 | size = MCLBYTES;
|
---|
| 469 | }
|
---|
| 470 | else if((hdrlen + ETH_HLEN) < MJUM9BYTES)
|
---|
| 471 | {
|
---|
| 472 | size = MJUM9BYTES;
|
---|
| 473 | }
|
---|
| 474 | else if ((hdrlen + ETH_HLEN) < MJUM16BYTES)
|
---|
| 475 | {
|
---|
| 476 | size = MJUM16BYTES;
|
---|
| 477 | }
|
---|
| 478 | else
|
---|
| 479 | {
|
---|
| 480 | AssertMsgFailed(("Unsupported size"));
|
---|
| 481 | }
|
---|
| 482 | m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
|
---|
[22896] | 483 | if (m == NULL)
|
---|
| 484 | {
|
---|
| 485 | /* error = ENOBUFS; */
|
---|
| 486 | error = 1;
|
---|
| 487 | goto out;
|
---|
| 488 | }
|
---|
[23154] | 489 | m->m_data += if_maxlinkhdr;
|
---|
| 490 | m->m_pkthdr.header = mtod(m, void *);
|
---|
| 491 | m->m_len = hdrlen;
|
---|
[39884] | 492 | /*
|
---|
| 493 | * Uninitialized TCP template looks very suspicious at this processing state, thus why we have
|
---|
| 494 | * to workaround the problem till right fix. Warning appears once at release log.
|
---|
| 495 | */
|
---|
[63668] | 496 | fUninitializedTemplate = RT_BOOL(( tp->t_template.ti_src.s_addr == INADDR_ANY
|
---|
[39884] | 497 | || tp->t_template.ti_dst.s_addr == INADDR_ANY));
|
---|
| 498 | #ifndef DEBUG_vvl
|
---|
[63668] | 499 | if (fUninitializedTemplate)
|
---|
[39884] | 500 | {
|
---|
| 501 | static bool fWarn;
|
---|
| 502 | tcp_template(tp);
|
---|
| 503 | if(!fWarn)
|
---|
| 504 | {
|
---|
[58077] | 505 | LogRel(("NAT: TCP: TCP template was created forcely from socket information\n"));
|
---|
[39884] | 506 | fWarn = true;
|
---|
| 507 | }
|
---|
| 508 | }
|
---|
| 509 | #else
|
---|
[63668] | 510 | Assert((!fUninitializedTemplate));
|
---|
[39884] | 511 | #endif
|
---|
[15074] | 512 | }
|
---|
| 513 |
|
---|
| 514 | ti = mtod(m, struct tcpiphdr *);
|
---|
| 515 |
|
---|
| 516 | memcpy((caddr_t)ti, &tp->t_template, sizeof (struct tcpiphdr));
|
---|
| 517 |
|
---|
| 518 | /*
|
---|
| 519 | * Fill in fields, remembering maximum advertised
|
---|
| 520 | * window for use in delaying messages about window sizes.
|
---|
| 521 | * If resending a FIN, be sure not to use a new sequence number.
|
---|
| 522 | */
|
---|
| 523 | if ( flags & TH_FIN
|
---|
| 524 | && tp->t_flags & TF_SENTFIN
|
---|
| 525 | && tp->snd_nxt == tp->snd_max)
|
---|
| 526 | tp->snd_nxt--;
|
---|
| 527 | /*
|
---|
| 528 | * If we are doing retransmissions, then snd_nxt will
|
---|
| 529 | * not reflect the first unsent octet. For ACK only
|
---|
| 530 | * packets, we do not want the sequence number of the
|
---|
| 531 | * retransmitted packet, we want the sequence number
|
---|
| 532 | * of the next unsent octet. So, if there is no data
|
---|
| 533 | * (and no SYN or FIN), use snd_max instead of snd_nxt
|
---|
| 534 | * when filling in ti_seq. But if we are in persist
|
---|
| 535 | * state, snd_max might reflect one byte beyond the
|
---|
| 536 | * right edge of the window, so use snd_nxt in that
|
---|
| 537 | * case, since we know we aren't doing a retransmission.
|
---|
| 538 | * (retransmit and persist are mutually exclusive...)
|
---|
| 539 | */
|
---|
| 540 | if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
|
---|
[25822] | 541 | ti->ti_seq = RT_H2N_U32(tp->snd_nxt);
|
---|
[15074] | 542 | else
|
---|
[25822] | 543 | ti->ti_seq = RT_H2N_U32(tp->snd_max);
|
---|
| 544 | ti->ti_ack = RT_H2N_U32(tp->rcv_nxt);
|
---|
[15074] | 545 | if (optlen)
|
---|
| 546 | {
|
---|
| 547 | memcpy((caddr_t)(ti + 1), (caddr_t)opt, optlen);
|
---|
[63013] | 548 | ti->ti_off = (uint8_t)((sizeof (struct tcphdr) + optlen) >> 2);
|
---|
[15074] | 549 | }
|
---|
| 550 | ti->ti_flags = flags;
|
---|
| 551 | /*
|
---|
| 552 | * Calculate receive window. Don't shrink window,
|
---|
| 553 | * but avoid silly window syndrome.
|
---|
| 554 | */
|
---|
[30045] | 555 | if (win < (long)(SBUF_SIZE(&so->so_rcv) / 4) && win < (long)tp->t_maxseg)
|
---|
[15074] | 556 | win = 0;
|
---|
| 557 | if (win > (long)TCP_MAXWIN << tp->rcv_scale)
|
---|
| 558 | win = (long)TCP_MAXWIN << tp->rcv_scale;
|
---|
[51904] | 559 | if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt))
|
---|
| 560 | win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt);
|
---|
[25822] | 561 | ti->ti_win = RT_H2N_U16((u_int16_t) (win>>tp->rcv_scale));
|
---|
[15074] | 562 |
|
---|
| 563 | #if 0
|
---|
| 564 | if (SEQ_GT(tp->snd_up, tp->snd_nxt))
|
---|
| 565 | {
|
---|
[25822] | 566 | ti->ti_urp = RT_H2N_U16((u_int16_t)(tp->snd_up - tp->snd_nxt));
|
---|
[15074] | 567 | #else
|
---|
| 568 | if (SEQ_GT(tp->snd_up, tp->snd_una))
|
---|
| 569 | {
|
---|
[25822] | 570 | ti->ti_urp = RT_H2N_U16((u_int16_t)(tp->snd_up - RT_N2H_U32(ti->ti_seq)));
|
---|
[15074] | 571 | #endif
|
---|
| 572 | ti->ti_flags |= TH_URG;
|
---|
| 573 | }
|
---|
| 574 | else
|
---|
[14470] | 575 | /*
|
---|
[15074] | 576 | * If no urgent pointer to send, then we pull
|
---|
| 577 | * the urgent pointer to the left edge of the send window
|
---|
| 578 | * so that it doesn't drift into the send window on sequence
|
---|
| 579 | * number wraparound.
|
---|
[14470] | 580 | */
|
---|
[15074] | 581 | tp->snd_up = tp->snd_una; /* drag it along */
|
---|
[1033] | 582 |
|
---|
[15074] | 583 | /*
|
---|
| 584 | * Put TCP length in extended header, and then
|
---|
| 585 | * checksum extended header and data.
|
---|
| 586 | */
|
---|
| 587 | if (len + optlen)
|
---|
[25822] | 588 | ti->ti_len = RT_H2N_U16((u_int16_t)(sizeof (struct tcphdr)
|
---|
| 589 | + optlen + len));
|
---|
[15074] | 590 | ti->ti_sum = cksum(m, (int)(hdrlen + len));
|
---|
[1] | 591 |
|
---|
[15074] | 592 | /*
|
---|
| 593 | * In transmit state, time the transmission and arrange for
|
---|
| 594 | * the retransmit. In persist state, just set snd_max.
|
---|
| 595 | */
|
---|
[23369] | 596 | if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0)
|
---|
[15074] | 597 | {
|
---|
| 598 | tcp_seq startseq = tp->snd_nxt;
|
---|
| 599 |
|
---|
[14470] | 600 | /*
|
---|
[15074] | 601 | * Advance snd_nxt over sequence space of this segment.
|
---|
[14470] | 602 | */
|
---|
[15074] | 603 | if (flags & (TH_SYN|TH_FIN))
|
---|
| 604 | {
|
---|
| 605 | if (flags & TH_SYN)
|
---|
| 606 | tp->snd_nxt++;
|
---|
| 607 | if (flags & TH_FIN)
|
---|
| 608 | {
|
---|
| 609 | tp->snd_nxt++;
|
---|
| 610 | tp->t_flags |= TF_SENTFIN;
|
---|
| 611 | }
|
---|
| 612 | }
|
---|
| 613 | tp->snd_nxt += len;
|
---|
| 614 | if (SEQ_GT(tp->snd_nxt, tp->snd_max))
|
---|
| 615 | {
|
---|
| 616 | tp->snd_max = tp->snd_nxt;
|
---|
| 617 | /*
|
---|
| 618 | * Time this transmission if not a retransmission and
|
---|
| 619 | * not currently timing anything.
|
---|
| 620 | */
|
---|
| 621 | if (tp->t_rtt == 0)
|
---|
| 622 | {
|
---|
| 623 | tp->t_rtt = 1;
|
---|
| 624 | tp->t_rtseq = startseq;
|
---|
| 625 | tcpstat.tcps_segstimed++;
|
---|
| 626 | }
|
---|
| 627 | }
|
---|
[1] | 628 |
|
---|
[14470] | 629 | /*
|
---|
[15074] | 630 | * Set retransmit timer if not currently set,
|
---|
| 631 | * and not doing an ack or a keep-alive probe.
|
---|
| 632 | * Initial value for retransmit timer is smoothed
|
---|
| 633 | * round-trip time + 2 * round-trip time variance.
|
---|
| 634 | * Initialize shift counter which is used for backoff
|
---|
| 635 | * of retransmit time.
|
---|
[14470] | 636 | */
|
---|
[15074] | 637 | if ( tp->t_timer[TCPT_REXMT] == 0
|
---|
| 638 | && tp->snd_nxt != tp->snd_una)
|
---|
| 639 | {
|
---|
| 640 | tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
|
---|
| 641 | if (tp->t_timer[TCPT_PERSIST])
|
---|
| 642 | {
|
---|
| 643 | tp->t_timer[TCPT_PERSIST] = 0;
|
---|
| 644 | tp->t_rxtshift = 0;
|
---|
| 645 | }
|
---|
| 646 | }
|
---|
| 647 | }
|
---|
| 648 | else
|
---|
| 649 | if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
|
---|
| 650 | tp->snd_max = tp->snd_nxt + len;
|
---|
[1] | 651 |
|
---|
[15074] | 652 | /*
|
---|
| 653 | * Fill in IP length and desired time to live and
|
---|
| 654 | * send to IP level. There should be a better way
|
---|
| 655 | * to handle ttl and tos; we could keep them in
|
---|
| 656 | * the template, but need a way to checksum without them.
|
---|
| 657 | */
|
---|
[22896] | 658 | M_ASSERTPKTHDR(m);
|
---|
| 659 | m->m_pkthdr.header = mtod(m, void *);
|
---|
[23154] | 660 | m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */
|
---|
[1] | 661 |
|
---|
| 662 | {
|
---|
[14470] | 663 | ((struct ip *)ti)->ip_len = m->m_len;
|
---|
| 664 | ((struct ip *)ti)->ip_ttl = ip_defttl;
|
---|
[22896] | 665 | ((struct ip *)ti)->ip_tos = so->so_iptos;
|
---|
[1033] | 666 |
|
---|
[15074] | 667 | /* #if BSD >= 43 */
|
---|
[14470] | 668 | /* Don't do IP options... */
|
---|
[15074] | 669 | #if 0
|
---|
| 670 | error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
|
---|
| 671 | so->so_options & SO_DONTROUTE, 0);
|
---|
| 672 | #endif
|
---|
[14470] | 673 | error = ip_output(pData, so, m);
|
---|
[1] | 674 |
|
---|
[15074] | 675 | #if 0
|
---|
| 676 | /* #else */
|
---|
| 677 | error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route,
|
---|
| 678 | so->so_options & SO_DONTROUTE);
|
---|
| 679 | /* #endif */
|
---|
| 680 | #endif
|
---|
[1] | 681 | }
|
---|
[15074] | 682 | if (error)
|
---|
| 683 | {
|
---|
[1] | 684 | out:
|
---|
[15074] | 685 | #if 0
|
---|
| 686 | if (error == ENOBUFS)
|
---|
| 687 | {
|
---|
| 688 | tcp_quench(tp->t_inpcb, 0);
|
---|
| 689 | return (0);
|
---|
[14470] | 690 | }
|
---|
[1] | 691 |
|
---|
[15074] | 692 | if ( ( error == EHOSTUNREACH
|
---|
| 693 | || error == ENETDOWN)
|
---|
| 694 | && TCPS_HAVERCVDSYN(tp->t_state))
|
---|
| 695 | {
|
---|
| 696 | tp->t_softerror = error;
|
---|
| 697 | return (0);
|
---|
| 698 | }
|
---|
| 699 | #endif
|
---|
[23462] | 700 | if (m != NULL)
|
---|
[28443] | 701 | m_freem(pData, m);
|
---|
[15074] | 702 | return (error);
|
---|
| 703 | }
|
---|
| 704 | tcpstat.tcps_sndtotal++;
|
---|
[1] | 705 |
|
---|
[15074] | 706 | /*
|
---|
| 707 | * Data sent (as far as we can tell).
|
---|
| 708 | * If this advertises a larger window than any other segment,
|
---|
| 709 | * then remember the size of the advertised window.
|
---|
| 710 | * Any pending ACK has now been sent.
|
---|
| 711 | */
|
---|
| 712 | if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
|
---|
| 713 | tp->rcv_adv = tp->rcv_nxt + win;
|
---|
| 714 | tp->last_ack_sent = tp->rcv_nxt;
|
---|
| 715 | tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
|
---|
| 716 | if (sendalot)
|
---|
| 717 | goto again;
|
---|
| 718 |
|
---|
| 719 | return (0);
|
---|
[1] | 720 | }
|
---|
| 721 |
|
---|
| 722 | void
|
---|
[15074] | 723 | tcp_setpersist(struct tcpcb *tp)
|
---|
[1] | 724 | {
|
---|
| 725 | int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
|
---|
| 726 |
|
---|
[15074] | 727 | #if 0
|
---|
| 728 | if (tp->t_timer[TCPT_REXMT])
|
---|
| 729 | panic("tcp_output REXMT");
|
---|
| 730 | #endif
|
---|
| 731 | /*
|
---|
| 732 | * Start/restart persistence timer.
|
---|
| 733 | */
|
---|
| 734 | TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
|
---|
| 735 | t * tcp_backoff[tp->t_rxtshift],
|
---|
| 736 | TCPTV_PERSMIN, TCPTV_PERSMAX);
|
---|
| 737 | if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
|
---|
| 738 | tp->t_rxtshift++;
|
---|
[1] | 739 | }
|
---|