1 /**
2 * @file
3 * Transmission Control Protocol, incoming traffic
4 *
5 * The input processing functions of the TCP layer.
6 *
7 * These functions are generally called in the order (ip_input() ->)
8 * tcp_input() -> * tcp_process() -> tcp_receive() (-> application).
9 *
10 */
11
12 /*
13 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without modification,
17 * are permitted provided that the following conditions are met:
18 *
19 * 1. Redistributions of source code must retain the above copyright notice,
20 * this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright notice,
22 * this list of conditions and the following disclaimer in the documentation
23 * and/or other materials provided with the distribution.
24 * 3. The name of the author may not be used to endorse or promote products
25 * derived from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
30 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
32 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
36 * OF SUCH DAMAGE.
37 *
38 * This file is part of the lwIP TCP/IP stack.
39 *
40 * Author: Adam Dunkels <adam@sics.se>
41 *
42 */
43
44 #include "lwip/opt.h"
45
46 #if LWIP_TCP /* don't build if not configured for use in lwipopts.h */
47
48 #include "lwip/priv/tcp_priv.h"
49 #include "lwip/def.h"
50 #include "lwip/ip_addr.h"
51 #include "lwip/netif.h"
52 #include "lwip/mem.h"
53 #include "lwip/memp.h"
54 #include "lwip/inet_chksum.h"
55 #include "lwip/stats.h"
56 #include "lwip/ip6.h"
57 #include "lwip/ip6_addr.h"
58 #if LWIP_ND6_TCP_REACHABILITY_HINTS
59 #include "lwip/nd6.h"
60 #endif /* LWIP_ND6_TCP_REACHABILITY_HINTS */
61
62 /** Initial CWND calculation as defined RFC 2581 */
63 #define LWIP_TCP_CALC_INITIAL_CWND(mss) LWIP_MIN((4U * (mss)), LWIP_MAX((2U * (mss)), 4380U));
64 /** Initial slow start threshold value: we use the full window */
65 #define LWIP_TCP_INITIAL_SSTHRESH(pcb) ((pcb)->snd_wnd)
66
67 /* These variables are global to all functions involved in the input
68 processing of TCP segments. They are set by the tcp_input()
69 function. */
70 static struct tcp_seg inseg;
71 static struct tcp_hdr *tcphdr;
72 static u16_t tcphdr_optlen;
73 static u16_t tcphdr_opt1len;
74 static u8_t* tcphdr_opt2;
75 static u16_t tcp_optidx;
76 static u32_t seqno, ackno;
77 static tcpwnd_size_t recv_acked;
78 static u16_t tcplen;
79 static u8_t flags;
80
81 static u8_t recv_flags;
82 static struct pbuf *recv_data;
83
84 struct tcp_pcb *tcp_input_pcb;
85
86 /* Forward declarations. */
87 static err_t tcp_process(struct tcp_pcb *pcb);
88 static void tcp_receive(struct tcp_pcb *pcb);
89 static void tcp_parseopt(struct tcp_pcb *pcb);
90
91 static void tcp_listen_input(struct tcp_pcb_listen *pcb);
92 static void tcp_timewait_input(struct tcp_pcb *pcb);
93
94 #if LWIP_TCP_SACK_OUT
95 static void tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right);
96 static void tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq);
97 #endif /* LWIP_TCP_SACK_OUT */
98 /**
99 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
100 * the segment between the PCBs and passes it on to tcp_process(), which implements
101 * the TCP finite state machine. This function is called by the IP layer (in
102 * ip_input()).
103 *
104 * @param p received TCP segment to process (p->payload pointing to the TCP header)
105 * @param inp network interface on which this segment was received
106 */
107 void
tcp_input(struct pbuf * p,struct netif * inp)108 tcp_input(struct pbuf *p, struct netif *inp)
109 {
110 struct tcp_pcb *pcb, *prev;
111 struct tcp_pcb_listen *lpcb;
112 #if SO_REUSE
113 struct tcp_pcb *lpcb_prev = NULL;
114 struct tcp_pcb_listen *lpcb_any = NULL;
115 #endif /* SO_REUSE */
116 u8_t hdrlen_bytes;
117 err_t err;
118
119 LWIP_UNUSED_ARG(inp);
120
121 PERF_START;
122
123 TCP_STATS_INC(tcp.recv);
124 MIB2_STATS_INC(mib2.tcpinsegs);
125
126 tcphdr = (struct tcp_hdr *)p->payload;
127
128 #if TCP_INPUT_DEBUG
129 tcp_debug_print(tcphdr);
130 #endif
131
132 /* Check that TCP header fits in payload */
133 if (p->len < TCP_HLEN) {
134 /* drop short packets */
135 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: short packet (%"U16_F" bytes) discarded\n", p->tot_len));
136 TCP_STATS_INC(tcp.lenerr);
137 goto dropped;
138 }
139
140 /* Don't even process incoming broadcasts/multicasts. */
141 if (ip_addr_isbroadcast(ip_current_dest_addr(), ip_current_netif()) ||
142 ip_addr_ismulticast(ip_current_dest_addr())) {
143 TCP_STATS_INC(tcp.proterr);
144 goto dropped;
145 }
146
147 #if CHECKSUM_CHECK_TCP
148 IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_CHECK_TCP) {
149 /* Verify TCP checksum. */
150 u16_t chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len,
151 ip_current_src_addr(), ip_current_dest_addr());
152 if (chksum != 0) {
153 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum 0x%04"X16_F"\n",
154 chksum));
155 tcp_debug_print(tcphdr);
156 TCP_STATS_INC(tcp.chkerr);
157 goto dropped;
158 }
159 }
160 #endif /* CHECKSUM_CHECK_TCP */
161
162 /* sanity-check header length */
163 hdrlen_bytes = TCPH_HDRLEN(tcphdr) * 4;
164 if ((hdrlen_bytes < TCP_HLEN) || (hdrlen_bytes > p->tot_len)) {
165 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: invalid header length (%"U16_F")\n", (u16_t)hdrlen_bytes));
166 TCP_STATS_INC(tcp.lenerr);
167 goto dropped;
168 }
169
170 /* Move the payload pointer in the pbuf so that it points to the
171 TCP data instead of the TCP header. */
172 tcphdr_optlen = hdrlen_bytes - TCP_HLEN;
173 tcphdr_opt2 = NULL;
174 if (p->len >= hdrlen_bytes) {
175 /* all options are in the first pbuf */
176 tcphdr_opt1len = tcphdr_optlen;
177 pbuf_header(p, -(s16_t)hdrlen_bytes); /* cannot fail */
178 } else {
179 u16_t opt2len;
180 /* TCP header fits into first pbuf, options don't - data is in the next pbuf */
181 /* there must be a next pbuf, due to hdrlen_bytes sanity check above */
182 LWIP_ASSERT("p->next != NULL", p->next != NULL);
183
184 /* advance over the TCP header (cannot fail) */
185 pbuf_header(p, -TCP_HLEN);
186
187 /* determine how long the first and second parts of the options are */
188 tcphdr_opt1len = p->len;
189 opt2len = tcphdr_optlen - tcphdr_opt1len;
190
191 /* options continue in the next pbuf: set p to zero length and hide the
192 options in the next pbuf (adjusting p->tot_len) */
193 pbuf_header(p, -(s16_t)tcphdr_opt1len);
194
195 /* check that the options fit in the second pbuf */
196 if (opt2len > p->next->len) {
197 /* drop short packets */
198 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: options overflow second pbuf (%"U16_F" bytes)\n", p->next->len));
199 TCP_STATS_INC(tcp.lenerr);
200 goto dropped;
201 }
202
203 /* remember the pointer to the second part of the options */
204 tcphdr_opt2 = (u8_t*)p->next->payload;
205
206 /* advance p->next to point after the options, and manually
207 adjust p->tot_len to keep it consistent with the changed p->next */
208 pbuf_header(p->next, -(s16_t)opt2len);
209 p->tot_len -= opt2len;
210
211 LWIP_ASSERT("p->len == 0", p->len == 0);
212 LWIP_ASSERT("p->tot_len == p->next->tot_len", p->tot_len == p->next->tot_len);
213 }
214
215 /* Convert fields in TCP header to host byte order. */
216 tcphdr->src = lwip_ntohs(tcphdr->src);
217 tcphdr->dest = lwip_ntohs(tcphdr->dest);
218 seqno = tcphdr->seqno = lwip_ntohl(tcphdr->seqno);
219 ackno = tcphdr->ackno = lwip_ntohl(tcphdr->ackno);
220 tcphdr->wnd = lwip_ntohs(tcphdr->wnd);
221
222 flags = TCPH_FLAGS(tcphdr);
223 tcplen = p->tot_len + ((flags & (TCP_FIN | TCP_SYN)) ? 1 : 0);
224
225 /* Demultiplex an incoming segment. First, we check if it is destined
226 for an active connection. */
227 prev = NULL;
228
229 for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
230 LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb->state != CLOSED);
231 LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT);
232 LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb->state != LISTEN);
233 if (pcb->remote_port == tcphdr->src &&
234 pcb->local_port == tcphdr->dest &&
235 ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) &&
236 ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) {
237 /* Move this PCB to the front of the list so that subsequent
238 lookups will be faster (we exploit locality in TCP segment
239 arrivals). */
240 LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb->next != pcb);
241 if (prev != NULL) {
242 prev->next = pcb->next;
243 pcb->next = tcp_active_pcbs;
244 tcp_active_pcbs = pcb;
245 } else {
246 TCP_STATS_INC(tcp.cachehit);
247 }
248 LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb->next != pcb);
249 break;
250 }
251 prev = pcb;
252 }
253
254 if (pcb == NULL) {
255 /* If it did not go to an active connection, we check the connections
256 in the TIME-WAIT state. */
257 for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
258 LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
259 if (pcb->remote_port == tcphdr->src &&
260 pcb->local_port == tcphdr->dest &&
261 ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) &&
262 ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) {
263 /* We don't really care enough to move this PCB to the front
264 of the list since we are not very likely to receive that
265 many segments for connections in TIME-WAIT. */
266 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for TIME_WAITing connection.\n"));
267 tcp_timewait_input(pcb);
268 pbuf_free(p);
269 return;
270 }
271 }
272
273 /* Finally, if we still did not get a match, we check all PCBs that
274 are LISTENing for incoming connections. */
275 prev = NULL;
276 for (lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
277 if (lpcb->local_port == tcphdr->dest) {
278 if (IP_IS_ANY_TYPE_VAL(lpcb->local_ip)) {
279 /* found an ANY TYPE (IPv4/IPv6) match */
280 #if SO_REUSE
281 lpcb_any = lpcb;
282 lpcb_prev = prev;
283 #else /* SO_REUSE */
284 break;
285 #endif /* SO_REUSE */
286 } else if (IP_ADDR_PCB_VERSION_MATCH_EXACT(lpcb, ip_current_dest_addr())) {
287 if (ip_addr_cmp(&lpcb->local_ip, ip_current_dest_addr())) {
288 /* found an exact match */
289 break;
290 } else if (ip_addr_isany(&lpcb->local_ip)) {
291 /* found an ANY-match */
292 #if SO_REUSE
293 lpcb_any = lpcb;
294 lpcb_prev = prev;
295 #else /* SO_REUSE */
296 break;
297 #endif /* SO_REUSE */
298 }
299 }
300 }
301 prev = (struct tcp_pcb *)lpcb;
302 }
303 #if SO_REUSE
304 /* first try specific local IP */
305 if (lpcb == NULL) {
306 /* only pass to ANY if no specific local IP has been found */
307 lpcb = lpcb_any;
308 prev = lpcb_prev;
309 }
310 #endif /* SO_REUSE */
311 if (lpcb != NULL) {
312 /* Move this PCB to the front of the list so that subsequent
313 lookups will be faster (we exploit locality in TCP segment
314 arrivals). */
315 if (prev != NULL) {
316 ((struct tcp_pcb_listen *)prev)->next = lpcb->next;
317 /* our successor is the remainder of the listening list */
318 lpcb->next = tcp_listen_pcbs.listen_pcbs;
319 /* put this listening pcb at the head of the listening list */
320 tcp_listen_pcbs.listen_pcbs = lpcb;
321 } else {
322 TCP_STATS_INC(tcp.cachehit);
323 }
324
325 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for LISTENing connection.\n"));
326 tcp_listen_input(lpcb);
327 pbuf_free(p);
328 return;
329 }
330 }
331
332 #if TCP_INPUT_DEBUG
333 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
334 tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
335 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
336 #endif /* TCP_INPUT_DEBUG */
337
338
339 if (pcb != NULL) {
340 /* The incoming segment belongs to a connection. */
341 #if TCP_INPUT_DEBUG
342 tcp_debug_print_state(pcb->state);
343 #endif /* TCP_INPUT_DEBUG */
344
345 /* Set up a tcp_seg structure. */
346 inseg.next = NULL;
347 inseg.len = p->tot_len;
348 inseg.p = p;
349 inseg.tcphdr = tcphdr;
350
351 recv_data = NULL;
352 recv_flags = 0;
353 recv_acked = 0;
354
355 if (flags & TCP_PSH) {
356 p->flags |= PBUF_FLAG_PUSH;
357 }
358
359 /* If there is data which was previously "refused" by upper layer */
360 if (pcb->refused_data != NULL) {
361 if ((tcp_process_refused_data(pcb) == ERR_ABRT) ||
362 ((pcb->refused_data != NULL) && (tcplen > 0))) {
363 /* pcb has been aborted or refused data is still refused and the new
364 segment contains data */
365 TCP_STATS_INC(tcp.drop);
366 MIB2_STATS_INC(mib2.tcpinerrs);
367 goto aborted;
368 }
369 }
370 tcp_input_pcb = pcb;
371 err = tcp_process(pcb);
372 /* A return value of ERR_ABRT means that tcp_abort() was called
373 and that the pcb has been freed. If so, we don't do anything. */
374 if (err != ERR_ABRT) {
375 if (recv_flags & TF_RESET) {
376 /* TF_RESET means that the connection was reset by the other
377 end. We then call the error callback to inform the
378 application that the connection is dead before we
379 deallocate the PCB. */
380 TCP_EVENT_ERR(pcb->errf, pcb->callback_arg, ERR_RST);
381 tcp_pcb_remove(&tcp_active_pcbs, pcb);
382 memp_free(MEMP_TCP_PCB, pcb);
383 } else {
384 err = ERR_OK;
385 /* If the application has registered a "sent" function to be
386 called when new send buffer space is available, we call it
387 now. */
388 if (recv_acked > 0) {
389 u16_t acked16;
390 #if LWIP_WND_SCALE
391 /* recv_acked is u32_t but the sent callback only takes a u16_t,
392 so we might have to call it multiple times. */
393 u32_t acked = recv_acked;
394 while (acked > 0) {
395 acked16 = (u16_t)LWIP_MIN(acked, 0xffffu);
396 acked -= acked16;
397 #else
398 {
399 acked16 = recv_acked;
400 #endif
401 TCP_EVENT_SENT(pcb, (u16_t)acked16, err);
402 if (err == ERR_ABRT) {
403 goto aborted;
404 }
405 }
406 recv_acked = 0;
407 }
408 if (recv_flags & TF_CLOSED) {
409 /* The connection has been closed and we will deallocate the
410 PCB. */
411 if (!(pcb->flags & TF_RXCLOSED)) {
412 /* Connection closed although the application has only shut down the
413 tx side: call the PCB's err callback and indicate the closure to
414 ensure the application doesn't continue using the PCB. */
415 TCP_EVENT_ERR(pcb->errf, pcb->callback_arg, ERR_CLSD);
416 }
417 tcp_pcb_remove(&tcp_active_pcbs, pcb);
418 memp_free(MEMP_TCP_PCB, pcb);
419 goto aborted;
420 }
421 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
422 while (recv_data != NULL) {
423 struct pbuf *rest = NULL;
424 pbuf_split_64k(recv_data, &rest);
425 #else /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
426 if (recv_data != NULL) {
427 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
428
429 LWIP_ASSERT("pcb->refused_data == NULL", pcb->refused_data == NULL);
430 if (pcb->flags & TF_RXCLOSED) {
431 /* received data although already closed -> abort (send RST) to
432 notify the remote host that not all data has been processed */
433 pbuf_free(recv_data);
434 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
435 if (rest != NULL) {
436 pbuf_free(rest);
437 }
438 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
439 tcp_abort(pcb);
440 goto aborted;
441 }
442
443 /* Notify application that data has been received. */
444 TCP_EVENT_RECV(pcb, recv_data, ERR_OK, err);
445 if (err == ERR_ABRT) {
446 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
447 if (rest != NULL) {
448 pbuf_free(rest);
449 }
450 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
451 goto aborted;
452 }
453
454 /* If the upper layer can't receive this data, store it */
455 if (err != ERR_OK) {
456 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
457 if (rest != NULL) {
458 pbuf_cat(recv_data, rest);
459 }
460 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
461 pcb->refused_data = recv_data;
462 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: keep incoming packet, because pcb is \"full\"\n"));
463 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
464 break;
465 } else {
466 /* Upper layer received the data, go on with the rest if > 64K */
467 recv_data = rest;
468 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
469 }
470 }
471
472 /* If a FIN segment was received, we call the callback
473 function with a NULL buffer to indicate EOF. */
474 if (recv_flags & TF_GOT_FIN) {
475 if (pcb->refused_data != NULL) {
476 /* Delay this if we have refused data. */
477 pcb->refused_data->flags |= PBUF_FLAG_TCP_FIN;
478 } else {
479 /* correct rcv_wnd as the application won't call tcp_recved()
480 for the FIN's seqno */
481 if (pcb->rcv_wnd != TCP_WND_MAX(pcb)) {
482 pcb->rcv_wnd++;
483 }
484 TCP_EVENT_CLOSED(pcb, err);
485 if (err == ERR_ABRT) {
486 goto aborted;
487 }
488 }
489 }
490
491 tcp_input_pcb = NULL;
492 /* Try to send something out. */
493 tcp_output(pcb);
494 #if TCP_INPUT_DEBUG
495 #if TCP_DEBUG
496 tcp_debug_print_state(pcb->state);
497 #endif /* TCP_DEBUG */
498 #endif /* TCP_INPUT_DEBUG */
499 }
500 }
501 /* Jump target if pcb has been aborted in a callback (by calling tcp_abort()).
502 Below this line, 'pcb' may not be dereferenced! */
503 aborted:
504 tcp_input_pcb = NULL;
505 recv_data = NULL;
506
507 /* give up our reference to inseg.p */
508 if (inseg.p != NULL)
509 {
510 pbuf_free(inseg.p);
511 inseg.p = NULL;
512 }
513 } else {
514
515 /* If no matching PCB was found, send a TCP RST (reset) to the
516 sender. */
517 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_input: no PCB match found, resetting.\n"));
518 if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) {
519 TCP_STATS_INC(tcp.proterr);
520 TCP_STATS_INC(tcp.drop);
521 tcp_rst(ackno, seqno + tcplen, ip_current_dest_addr(),
522 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
523 }
524 pbuf_free(p);
525 }
526
527 LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
528 PERF_STOP("tcp_input");
529 return;
530 dropped:
531 TCP_STATS_INC(tcp.drop);
532 MIB2_STATS_INC(mib2.tcpinerrs);
533 pbuf_free(p);
534 }
535
536 /**
537 * Called by tcp_input() when a segment arrives for a listening
538 * connection (from tcp_input()).
539 *
540 * @param pcb the tcp_pcb_listen for which a segment arrived
541 *
542 * @note the segment which arrived is saved in global variables, therefore only the pcb
543 * involved is passed as a parameter to this function
544 */
545 static void
546 tcp_listen_input(struct tcp_pcb_listen *pcb)
547 {
548 struct tcp_pcb *npcb;
549 err_t rc;
550
551 if (flags & TCP_RST) {
552 /* An incoming RST should be ignored. Return. */
553 return;
554 }
555
556 /* In the LISTEN state, we check for incoming SYN segments,
557 creates a new PCB, and responds with a SYN|ACK. */
558 if (flags & TCP_ACK) {
559 /* For incoming segments with the ACK flag set, respond with a
560 RST. */
561 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
562 tcp_rst(ackno, seqno + tcplen, ip_current_dest_addr(),
563 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
564 } else if (flags & TCP_SYN) {
565 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection request %"U16_F" -> %"U16_F".\n", tcphdr->src, tcphdr->dest));
566 #if TCP_LISTEN_BACKLOG
567 if (pcb->accepts_pending >= pcb->backlog) {
568 LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: listen backlog exceeded for port %"U16_F"\n", tcphdr->dest));
569 return;
570 }
571 #endif /* TCP_LISTEN_BACKLOG */
572 npcb = tcp_alloc(pcb->prio);
573 /* If a new PCB could not be created (probably due to lack of memory),
574 we don't do anything, but rely on the sender will retransmit the
575 SYN at a time when we have more memory available. */
576 if (npcb == NULL) {
577 err_t err;
578 LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: could not allocate PCB\n"));
579 TCP_STATS_INC(tcp.memerr);
580 TCP_EVENT_ACCEPT(pcb, NULL, pcb->callback_arg, ERR_MEM, err);
581 LWIP_UNUSED_ARG(err); /* err not useful here */
582 return;
583 }
584 #if TCP_LISTEN_BACKLOG
585 pcb->accepts_pending++;
586 npcb->flags |= TF_BACKLOGPEND;
587 #endif /* TCP_LISTEN_BACKLOG */
588 /* Set up the new PCB. */
589 ip_addr_copy(npcb->local_ip, *ip_current_dest_addr());
590 ip_addr_copy(npcb->remote_ip, *ip_current_src_addr());
591 npcb->local_port = pcb->local_port;
592 npcb->remote_port = tcphdr->src;
593 npcb->state = SYN_RCVD;
594 npcb->rcv_nxt = seqno + 1;
595 npcb->rcv_ann_right_edge = npcb->rcv_nxt;
596 npcb->snd_wl1 = seqno - 1;/* initialise to seqno-1 to force window update */
597 npcb->callback_arg = pcb->callback_arg;
598 #if LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG
599 npcb->listener = pcb;
600 #endif /* LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG */
601 /* inherit socket options */
602 npcb->so_options = pcb->so_options & SOF_INHERITED;
603 /* Register the new PCB so that we can begin receiving segments
604 for it. */
605 TCP_REG_ACTIVE(npcb);
606
607 /* Parse any options in the SYN. */
608 tcp_parseopt(npcb);
609 npcb->snd_wnd = SND_WND_SCALE(npcb, tcphdr->wnd);
610 npcb->snd_wnd_max = npcb->snd_wnd;
611 npcb->ssthresh = LWIP_TCP_INITIAL_SSTHRESH(npcb);
612
613 /* Copy usr rcv wndow value to new pcb */
614 if(pcb->usr_rcv_wnd != 0) {
615 npcb->usr_rcv_wnd = pcb->usr_rcv_wnd;
616 npcb->rcv_wnd = npcb->rcv_ann_wnd = npcb->usr_rcv_wnd;
617 }
618
619
620 #if TCP_CALCULATE_EFF_SEND_MSS
621 npcb->mss = tcp_eff_send_mss(npcb->mss, &npcb->local_ip, &npcb->remote_ip);
622 #endif /* TCP_CALCULATE_EFF_SEND_MSS */
623
624 MIB2_STATS_INC(mib2.tcppassiveopens);
625
626 /* Send a SYN|ACK together with the MSS option. */
627 rc = tcp_enqueue_flags(npcb, TCP_SYN | TCP_ACK);
628 if (rc != ERR_OK) {
629 tcp_abandon(npcb, 0);
630 return;
631 }
632 tcp_output(npcb);
633 }
634 return;
635 }
636
637 /**
638 * Called by tcp_input() when a segment arrives for a connection in
639 * TIME_WAIT.
640 *
641 * @param pcb the tcp_pcb for which a segment arrived
642 *
643 * @note the segment which arrived is saved in global variables, therefore only the pcb
644 * involved is passed as a parameter to this function
645 */
646 static void
647 tcp_timewait_input(struct tcp_pcb *pcb)
648 {
649 /* RFC 1337: in TIME_WAIT, ignore RST and ACK FINs + any 'acceptable' segments */
650 /* RFC 793 3.9 Event Processing - Segment Arrives:
651 * - first check sequence number - we skip that one in TIME_WAIT (always
652 * acceptable since we only send ACKs)
653 * - second check the RST bit (... return) */
654 if (flags & TCP_RST) {
655 return;
656 }
657 /* - fourth, check the SYN bit, */
658 if (flags & TCP_SYN) {
659 /* If an incoming segment is not acceptable, an acknowledgment
660 should be sent in reply */
661 if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd)) {
662 /* If the SYN is in the window it is an error, send a reset */
663 tcp_rst(ackno, seqno + tcplen, ip_current_dest_addr(),
664 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
665 return;
666 }
667 } else if (flags & TCP_FIN) {
668 /* - eighth, check the FIN bit: Remain in the TIME-WAIT state.
669 Restart the 2 MSL time-wait timeout.*/
670 pcb->tmr = tcp_ticks;
671 }
672
673 if ((tcplen > 0)) {
674 /* Acknowledge data, FIN or out-of-window SYN */
675 pcb->flags |= TF_ACK_NOW;
676 tcp_output(pcb);
677 }
678 return;
679 }
680
681 /**
682 * Implements the TCP state machine. Called by tcp_input. In some
683 * states tcp_receive() is called to receive data. The tcp_seg
684 * argument will be freed by the caller (tcp_input()) unless the
685 * recv_data pointer in the pcb is set.
686 *
687 * @param pcb the tcp_pcb for which a segment arrived
688 *
689 * @note the segment which arrived is saved in global variables, therefore only the pcb
690 * involved is passed as a parameter to this function
691 */
692 static err_t
693 tcp_process(struct tcp_pcb *pcb)
694 {
695 struct tcp_seg *rseg;
696 u8_t acceptable = 0;
697 err_t err;
698
699 err = ERR_OK;
700
701 /* Process incoming RST segments. */
702 if (flags & TCP_RST) {
703 /* First, determine if the reset is acceptable. */
704 if (pcb->state == SYN_SENT) {
705 /* "In the SYN-SENT state (a RST received in response to an initial SYN),
706 the RST is acceptable if the ACK field acknowledges the SYN." */
707 if (ackno == pcb->snd_nxt) {
708 acceptable = 1;
709 }
710 } else {
711 /* "In all states except SYN-SENT, all reset (RST) segments are validated
712 by checking their SEQ-fields." */
713 if (seqno == pcb->rcv_nxt) {
714 acceptable = 1;
715 } else if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt,
716 pcb->rcv_nxt + pcb->rcv_wnd)) {
717 /* If the sequence number is inside the window, we only send an ACK
718 and wait for a re-send with matching sequence number.
719 This violates RFC 793, but is required to protection against
720 CVE-2004-0230 (RST spoofing attack). */
721 tcp_ack_now(pcb);
722 }
723 }
724
725 if (acceptable) {
726 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: Connection RESET\n"));
727 LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb->state != CLOSED);
728 recv_flags |= TF_RESET;
729 pcb->flags &= ~TF_ACK_DELAY;
730 return ERR_RST;
731 } else {
732 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
733 seqno, pcb->rcv_nxt));
734 LWIP_DEBUGF(TCP_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
735 seqno, pcb->rcv_nxt));
736 return ERR_OK;
737 }
738 }
739
740 if ((flags & TCP_SYN) && (pcb->state != SYN_SENT && pcb->state != SYN_RCVD)) {
741 /* Cope with new connection attempt after remote end crashed */
742 tcp_ack_now(pcb);
743 return ERR_OK;
744 }
745
746 if ((pcb->flags & TF_RXCLOSED) == 0) {
747 /* Update the PCB (in)activity timer unless rx is closed (see tcp_shutdown) */
748 pcb->tmr = tcp_ticks;
749 }
750 pcb->keep_cnt_sent = 0;
751
752 tcp_parseopt(pcb);
753
754 /* Do different things depending on the TCP state. */
755 switch (pcb->state) {
756 case SYN_SENT:
757 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("SYN-SENT: ackno %"U32_F" pcb->snd_nxt %"U32_F" unacked %"U32_F"\n", ackno,
758 pcb->snd_nxt, lwip_ntohl(pcb->unacked->tcphdr->seqno)));
759 /* received SYN ACK with expected sequence number? */
760 if ((flags & TCP_ACK) && (flags & TCP_SYN)
761 && (ackno == pcb->lastack + 1)) {
762 pcb->rcv_nxt = seqno + 1;
763 pcb->rcv_ann_right_edge = pcb->rcv_nxt;
764 pcb->lastack = ackno;
765 pcb->snd_wnd = SND_WND_SCALE(pcb, tcphdr->wnd);
766 pcb->snd_wnd_max = pcb->snd_wnd;
767 pcb->snd_wl1 = seqno - 1; /* initialise to seqno - 1 to force window update */
768 pcb->state = ESTABLISHED;
769
770 #if TCP_CALCULATE_EFF_SEND_MSS
771 pcb->mss = tcp_eff_send_mss(pcb->mss, &pcb->local_ip, &pcb->remote_ip);
772 #endif /* TCP_CALCULATE_EFF_SEND_MSS */
773
774 /* Set ssthresh again after changing 'mss' and 'snd_wnd' */
775 pcb->ssthresh = LWIP_TCP_INITIAL_SSTHRESH(pcb);
776
777 pcb->cwnd = LWIP_TCP_CALC_INITIAL_CWND(pcb->mss);
778 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_process (SENT): cwnd %"TCPWNDSIZE_F
779 " ssthresh %"TCPWNDSIZE_F"\n",
780 pcb->cwnd, pcb->ssthresh));
781 LWIP_ASSERT("pcb->snd_queuelen > 0", (pcb->snd_queuelen > 0));
782 --pcb->snd_queuelen;
783 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_process: SYN-SENT --queuelen %"TCPWNDSIZE_F"\n", (tcpwnd_size_t)pcb->snd_queuelen));
784 rseg = pcb->unacked;
785 if (rseg == NULL) {
786 /* might happen if tcp_output fails in tcp_rexmit_rto()
787 in which case the segment is on the unsent list */
788 rseg = pcb->unsent;
789 LWIP_ASSERT("no segment to free", rseg != NULL);
790 pcb->unsent = rseg->next;
791 } else {
792 pcb->unacked = rseg->next;
793 }
794 tcp_seg_free(rseg);
795
796 /* If there's nothing left to acknowledge, stop the retransmit
797 timer, otherwise reset it to start again */
798 if (pcb->unacked == NULL) {
799 pcb->rtime = -1;
800 } else {
801 pcb->rtime = 0;
802 pcb->nrtx = 0;
803 }
804
805 /* Call the user specified function to call when successfully
806 * connected. */
807 TCP_EVENT_CONNECTED(pcb, ERR_OK, err);
808 if (err == ERR_ABRT) {
809 return ERR_ABRT;
810 }
811 tcp_ack_now(pcb);
812 }
813 /* received ACK? possibly a half-open connection */
814 else if (flags & TCP_ACK) {
815 /* send a RST to bring the other side in a non-synchronized state. */
816 tcp_rst(ackno, seqno + tcplen, ip_current_dest_addr(),
817 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
818 /* Resend SYN immediately (don't wait for rto timeout) to establish
819 connection faster */
820 pcb->rtime = 0;
821 tcp_rexmit_rto(pcb);
822 }
823 break;
824 case SYN_RCVD:
825 if (flags & TCP_ACK) {
826 /* expected ACK number? */
827 if (TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_nxt)) {
828 pcb->state = ESTABLISHED;
829 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection established %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
830 #if LWIP_CALLBACK_API
831 LWIP_ASSERT("pcb->listener->accept != NULL",
832 (pcb->listener == NULL) || (pcb->listener->accept != NULL));
833 if (pcb->listener == NULL) {
834 /* listen pcb might be closed by now */
835 err = ERR_VAL;
836 } else
837 #endif
838 {
839 tcp_backlog_accepted(pcb);
840 /* Call the accept function. */
841 TCP_EVENT_ACCEPT(pcb->listener, pcb, pcb->callback_arg, ERR_OK, err);
842 }
843 if (err != ERR_OK) {
844 /* If the accept function returns with an error, we abort
845 * the connection. */
846 /* Already aborted? */
847 if (err != ERR_ABRT) {
848 tcp_abort(pcb);
849 }
850 return ERR_ABRT;
851 }
852 /* If there was any data contained within this ACK,
853 * we'd better pass it on to the application as well. */
854 tcp_receive(pcb);
855
856 /* passive open: update initial ssthresh now that the correct window is
857 known: if the remote side supports window scaling, the window sent
858 with the initial SYN can be smaller than the one used later */
859 pcb->ssthresh = LWIP_TCP_INITIAL_SSTHRESH(pcb);
860
861 /* Prevent ACK for SYN to generate a sent event */
862 if (recv_acked != 0) {
863 recv_acked--;
864 }
865
866 pcb->cwnd = LWIP_TCP_CALC_INITIAL_CWND(pcb->mss);
867 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_process (SYN_RCVD): cwnd %"TCPWNDSIZE_F
868 " ssthresh %"TCPWNDSIZE_F"\n",
869 pcb->cwnd, pcb->ssthresh));
870
871 if (recv_flags & TF_GOT_FIN) {
872 tcp_ack_now(pcb);
873 pcb->state = CLOSE_WAIT;
874 }
875 } else {
876 /* incorrect ACK number, send RST */
877 tcp_rst(ackno, seqno + tcplen, ip_current_dest_addr(),
878 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
879 }
880 } else if ((flags & TCP_SYN) && (seqno == pcb->rcv_nxt - 1)) {
881 /* Looks like another copy of the SYN - retransmit our SYN-ACK */
882 tcp_rexmit(pcb);
883 }
884 break;
885 case CLOSE_WAIT:
886 /* FALLTHROUGH */
887 case ESTABLISHED:
888 tcp_receive(pcb);
889 if (recv_flags & TF_GOT_FIN) { /* passive close */
890 tcp_ack_now(pcb);
891 pcb->state = CLOSE_WAIT;
892 }
893 break;
894 case FIN_WAIT_1:
895 tcp_receive(pcb);
896 if (recv_flags & TF_GOT_FIN) {
897 if ((flags & TCP_ACK) && (ackno == pcb->snd_nxt) &&
898 pcb->unsent == NULL) {
899 LWIP_DEBUGF(TCP_DEBUG,
900 ("TCP connection closed: FIN_WAIT_1 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
901 tcp_ack_now(pcb);
902 tcp_pcb_purge(pcb);
903 TCP_RMV_ACTIVE(pcb);
904 pcb->state = TIME_WAIT;
905 TCP_REG(&tcp_tw_pcbs, pcb);
906 } else {
907 tcp_ack_now(pcb);
908 pcb->state = CLOSING;
909 }
910 } else if ((flags & TCP_ACK) && (ackno == pcb->snd_nxt) &&
911 pcb->unsent == NULL) {
912 pcb->state = FIN_WAIT_2;
913 }
914 break;
915 case FIN_WAIT_2:
916 tcp_receive(pcb);
917 if (recv_flags & TF_GOT_FIN) {
918 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: FIN_WAIT_2 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
919 tcp_ack_now(pcb);
920 tcp_pcb_purge(pcb);
921 TCP_RMV_ACTIVE(pcb);
922 pcb->state = TIME_WAIT;
923 TCP_REG(&tcp_tw_pcbs, pcb);
924 }
925 break;
926 case CLOSING:
927 tcp_receive(pcb);
928 if (flags & TCP_ACK && ackno == pcb->snd_nxt && pcb->unsent == NULL) {
929 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: CLOSING %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
930 tcp_pcb_purge(pcb);
931 TCP_RMV_ACTIVE(pcb);
932 pcb->state = TIME_WAIT;
933 TCP_REG(&tcp_tw_pcbs, pcb);
934 }
935 break;
936 case LAST_ACK:
937 tcp_receive(pcb);
938 if (flags & TCP_ACK && ackno == pcb->snd_nxt && pcb->unsent == NULL) {
939 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: LAST_ACK %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
940 /* bugfix #21699: don't set pcb->state to CLOSED here or we risk leaking segments */
941 recv_flags |= TF_CLOSED;
942 }
943 break;
944 default:
945 break;
946 }
947 return ERR_OK;
948 }
949
950 #if TCP_QUEUE_OOSEQ
951 /**
952 * Insert segment into the list (segments covered with new one will be deleted)
953 *
954 * Called from tcp_receive()
955 */
956 static void
957 tcp_oos_insert_segment(struct tcp_seg *cseg, struct tcp_seg *next)
958 {
959 struct tcp_seg *old_seg;
960
961 if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
962 /* received segment overlaps all following segments */
963 tcp_segs_free(next);
964 next = NULL;
965 } else {
966 /* delete some following segments
967 oos queue may have segments with FIN flag */
968 while (next &&
969 TCP_SEQ_GEQ((seqno + cseg->len),
970 (next->tcphdr->seqno + next->len))) {
971 /* cseg with FIN already processed */
972 if (TCPH_FLAGS(next->tcphdr) & TCP_FIN) {
973 TCPH_SET_FLAG(cseg->tcphdr, TCP_FIN);
974 }
975 old_seg = next;
976 next = next->next;
977 tcp_seg_free(old_seg);
978 }
979 if (next &&
980 TCP_SEQ_GT(seqno + cseg->len, next->tcphdr->seqno)) {
981 /* We need to trim the incoming segment. */
982 cseg->len = (u16_t)(next->tcphdr->seqno - seqno);
983 pbuf_realloc(cseg->p, cseg->len);
984 }
985 }
986 cseg->next = next;
987 }
988 #endif /* TCP_QUEUE_OOSEQ */
989
990 /** Remove segments from a list if the incoming ACK acknowledges them */
991 static struct tcp_seg *
992 tcp_free_acked_segments(struct tcp_pcb *pcb, struct tcp_seg *seg_list, const char *dbg_list_name,
993 struct tcp_seg *dbg_other_seg_list)
994 {
995 struct tcp_seg *next;
996 u16_t clen;
997
998 LWIP_UNUSED_ARG(dbg_list_name);
999 LWIP_UNUSED_ARG(dbg_other_seg_list);
1000
1001 while (seg_list != NULL &&
1002 TCP_SEQ_LEQ(lwip_ntohl(seg_list->tcphdr->seqno) +
1003 TCP_TCPLEN(seg_list), ackno)) {
1004 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %"U32_F":%"U32_F" from pcb->%s\n",
1005 lwip_ntohl(seg_list->tcphdr->seqno),
1006 lwip_ntohl(seg_list->tcphdr->seqno) + TCP_TCPLEN(seg_list),
1007 dbg_list_name));
1008
1009 next = seg_list;
1010 seg_list = seg_list->next;
1011
1012 clen = pbuf_clen(next->p);
1013 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %"TCPWNDSIZE_F" ... ",
1014 (tcpwnd_size_t)pcb->snd_queuelen));
1015 LWIP_ASSERT("pcb->snd_queuelen >= pbuf_clen(next->p)", (pcb->snd_queuelen >= clen));
1016
1017 pcb->snd_queuelen = (u16_t)(pcb->snd_queuelen - clen);
1018 recv_acked = (tcpwnd_size_t)(recv_acked + next->len);
1019 tcp_seg_free(next);
1020
1021 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%"TCPWNDSIZE_F" (after freeing %s)\n",
1022 (tcpwnd_size_t)pcb->snd_queuelen,
1023 dbg_list_name));
1024 if (pcb->snd_queuelen != 0) {
1025 LWIP_ASSERT("tcp_receive: valid queue length",
1026 seg_list != NULL || dbg_other_seg_list != NULL);
1027 }
1028 }
1029 return seg_list;
1030 }
1031
1032 /**
1033 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
1034 * data, and if so frees the memory of the buffered data. Next, it places the
1035 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
1036 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
1037 * it has been removed from the buffer.
1038 *
1039 * If the incoming segment constitutes an ACK for a segment that was used for RTT
1040 * estimation, the RTT is estimated here as well.
1041 *
1042 * Called from tcp_process().
1043 */
1044 static void
1045 tcp_receive(struct tcp_pcb *pcb)
1046 {
1047 struct tcp_seg *next;
1048 #if TCP_QUEUE_OOSEQ
1049 struct tcp_seg *prev, *cseg;
1050 #endif /* TCP_QUEUE_OOSEQ */
1051 s32_t off;
1052 s16_t m;
1053 u32_t right_wnd_edge;
1054 u16_t new_tot_len;
1055 int found_dupack = 0;
1056 #if TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS
1057 u32_t ooseq_blen;
1058 u16_t ooseq_qlen;
1059 #endif /* TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS */
1060
1061 LWIP_ASSERT("tcp_receive: wrong state", pcb->state >= ESTABLISHED);
1062
1063 if (flags & TCP_ACK) {
1064 right_wnd_edge = pcb->snd_wnd + pcb->snd_wl2;
1065
1066 /* Update window. */
1067 if (TCP_SEQ_LT(pcb->snd_wl1, seqno) ||
1068 (pcb->snd_wl1 == seqno && TCP_SEQ_LT(pcb->snd_wl2, ackno)) ||
1069 (pcb->snd_wl2 == ackno && (u32_t)SND_WND_SCALE(pcb, tcphdr->wnd) > pcb->snd_wnd)) {
1070 pcb->snd_wnd = SND_WND_SCALE(pcb, tcphdr->wnd);
1071 /* keep track of the biggest window announced by the remote host to calculate
1072 the maximum segment size */
1073 if (pcb->snd_wnd_max < pcb->snd_wnd) {
1074 pcb->snd_wnd_max = pcb->snd_wnd;
1075 }
1076 pcb->snd_wl1 = seqno;
1077 pcb->snd_wl2 = ackno;
1078 if (pcb->snd_wnd == 0) {
1079 if (pcb->persist_backoff == 0) {
1080 /* start persist timer */
1081 pcb->persist_cnt = 0;
1082 pcb->persist_backoff = 1;
1083 }
1084 } else if (pcb->persist_backoff > 0) {
1085 /* stop persist timer */
1086 pcb->persist_backoff = 0;
1087 }
1088 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: window update %"TCPWNDSIZE_F"\n", pcb->snd_wnd));
1089 #if TCP_WND_DEBUG
1090 } else {
1091 if (pcb->snd_wnd != (tcpwnd_size_t)SND_WND_SCALE(pcb, tcphdr->wnd)) {
1092 LWIP_DEBUGF(TCP_WND_DEBUG,
1093 ("tcp_receive: no window update lastack %"U32_F" ackno %"
1094 U32_F" wl1 %"U32_F" seqno %"U32_F" wl2 %"U32_F"\n",
1095 pcb->lastack, ackno, pcb->snd_wl1, seqno, pcb->snd_wl2));
1096 }
1097 #endif /* TCP_WND_DEBUG */
1098 }
1099
1100 /* (From Stevens TCP/IP Illustrated Vol II, p970.) Its only a
1101 * duplicate ack if:
1102 * 1) It doesn't ACK new data
1103 * 2) length of received packet is zero (i.e. no payload)
1104 * 3) the advertised window hasn't changed
1105 * 4) There is outstanding unacknowledged data (retransmission timer running)
1106 * 5) The ACK is == biggest ACK sequence number so far seen (snd_una)
1107 *
1108 * If it passes all five, should process as a dupack:
1109 * a) dupacks < 3: do nothing
1110 * b) dupacks == 3: fast retransmit
1111 * c) dupacks > 3: increase cwnd
1112 *
1113 * If it only passes 1-3, should reset dupack counter (and add to
1114 * stats, which we don't do in lwIP)
1115 *
1116 * If it only passes 1, should reset dupack counter
1117 *
1118 */
1119
1120 /* Clause 1 */
1121 if (TCP_SEQ_LEQ(ackno, pcb->lastack)) {
1122 /* Clause 2 */
1123 if (tcplen == 0) {
1124 /* Clause 3 */
1125 if (pcb->snd_wl2 + pcb->snd_wnd == right_wnd_edge) {
1126 /* Clause 4 */
1127 if (pcb->rtime >= 0) {
1128 /* Clause 5 */
1129 if (pcb->lastack == ackno) {
1130 found_dupack = 1;
1131 if ((u8_t)(pcb->dupacks + 1) > pcb->dupacks) {
1132 ++pcb->dupacks;
1133 }
1134 if (pcb->dupacks > 3) {
1135 /* Inflate the congestion window, but not if it means that
1136 the value overflows. */
1137 if ((tcpwnd_size_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
1138 pcb->cwnd += pcb->mss;
1139 }
1140 } else if (pcb->dupacks == 3) {
1141 /* Do fast retransmit */
1142 tcp_rexmit_fast(pcb);
1143 }
1144 }
1145 }
1146 }
1147 }
1148 /* If Clause (1) or more is true, but not a duplicate ack, reset
1149 * count of consecutive duplicate acks */
1150 if (!found_dupack) {
1151 pcb->dupacks = 0;
1152 }
1153 } else if (TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_nxt)) {
1154 /* We come here when the ACK acknowledges new data. */
1155
1156 /* Reset the "IN Fast Retransmit" flag, since we are no longer
1157 in fast retransmit. Also reset the congestion window to the
1158 slow start threshold. */
1159 if (pcb->flags & TF_INFR) {
1160 pcb->flags &= ~TF_INFR;
1161 pcb->cwnd = pcb->ssthresh;
1162 }
1163
1164 /* Reset the number of retransmissions. */
1165 pcb->nrtx = 0;
1166
1167 /* Reset the retransmission time-out. */
1168 pcb->rto = (pcb->sa >> 3) + pcb->sv;
1169
1170 /* Reset the fast retransmit variables. */
1171 pcb->dupacks = 0;
1172 pcb->lastack = ackno;
1173
1174 /* Update the congestion control variables (cwnd and
1175 ssthresh). */
1176 if (pcb->state >= ESTABLISHED) {
1177 if (pcb->cwnd < pcb->ssthresh) {
1178 if ((tcpwnd_size_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
1179 pcb->cwnd += pcb->mss;
1180 }
1181 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
1182 } else {
1183 tcpwnd_size_t new_cwnd = (pcb->cwnd + pcb->mss * pcb->mss / pcb->cwnd);
1184 if (new_cwnd > pcb->cwnd) {
1185 pcb->cwnd = new_cwnd;
1186 }
1187 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: congestion avoidance cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
1188 }
1189 }
1190 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: ACK for %"U32_F", unacked->seqno %"U32_F":%"U32_F"\n",
1191 ackno,
1192 pcb->unacked != NULL?
1193 lwip_ntohl(pcb->unacked->tcphdr->seqno): 0,
1194 pcb->unacked != NULL?
1195 lwip_ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked): 0));
1196
1197 /* Remove segment from the unacknowledged list if the incoming
1198 ACK acknowledges them. */
1199 pcb->unacked = tcp_free_acked_segments(pcb, pcb->unacked, "unacked", pcb->unsent);
1200 /* We go through the ->unsent list to see if any of the segments
1201 on the list are acknowledged by the ACK. This may seem
1202 strange since an "unsent" segment shouldn't be acked. The
1203 rationale is that lwIP puts all outstanding segments on the
1204 ->unsent list after a retransmission, so these segments may
1205 in fact have been sent once. */
1206 pcb->unsent = tcp_free_acked_segments(pcb, pcb->unsent, "unsent", pcb->unacked);
1207
1208 /* If there's nothing left to acknowledge, stop the retransmit
1209 timer, otherwise reset it to start again */
1210 if (pcb->unacked == NULL) {
1211 pcb->rtime = -1;
1212 } else {
1213 pcb->rtime = 0;
1214 }
1215
1216 pcb->polltmr = 0;
1217
1218 #if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS
1219 if (ip_current_is_v6()) {
1220 /* Inform neighbor reachability of forward progress. */
1221 nd6_reachability_hint(ip6_current_src_addr());
1222 }
1223 #endif /* LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS*/
1224 } else {
1225 /* Out of sequence ACK, didn't really ack anything */
1226 tcp_send_empty_ack(pcb);
1227 }
1228
1229 /* We go through the ->unsent list to see if any of the segments
1230 on the list are acknowledged by the ACK. This may seem
1231 strange since an "unsent" segment shouldn't be acked. The
1232 rationale is that lwIP puts all outstanding segments on the
1233 ->unsent list after a retransmission, so these segments may
1234 in fact have been sent once. */
1235 while (pcb->unsent != NULL &&
1236 TCP_SEQ_BETWEEN(ackno, lwip_ntohl(pcb->unsent->tcphdr->seqno) +
1237 TCP_TCPLEN(pcb->unsent), pcb->snd_nxt)) {
1238 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %"U32_F":%"U32_F" from pcb->unsent\n",
1239 lwip_ntohl(pcb->unsent->tcphdr->seqno), lwip_ntohl(pcb->unsent->tcphdr->seqno) +
1240 TCP_TCPLEN(pcb->unsent)));
1241
1242 next = pcb->unsent;
1243 pcb->unsent = pcb->unsent->next;
1244 #if TCP_OVERSIZE
1245 if (pcb->unsent == NULL) {
1246 pcb->unsent_oversize = 0;
1247 }
1248 #endif /* TCP_OVERSIZE */
1249 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %"TCPWNDSIZE_F" ... ", (tcpwnd_size_t)pcb->snd_queuelen));
1250 LWIP_ASSERT("pcb->snd_queuelen >= pbuf_clen(next->p)", (pcb->snd_queuelen >= pbuf_clen(next->p)));
1251 /* Prevent ACK for FIN to generate a sent event */
1252 pcb->snd_queuelen -= pbuf_clen(next->p);
1253 recv_acked += next->len;
1254 tcp_seg_free(next);
1255 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%"TCPWNDSIZE_F" (after freeing unsent)\n", (tcpwnd_size_t)pcb->snd_queuelen));
1256 if (pcb->snd_queuelen != 0) {
1257 LWIP_ASSERT("tcp_receive: valid queue length",
1258 pcb->unacked != NULL || pcb->unsent != NULL);
1259 }
1260 }
1261 pcb->snd_buf += recv_acked;
1262 /* End of ACK for new data processing. */
1263
1264 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: pcb->rttest %"U32_F" rtseq %"U32_F" ackno %"U32_F"\n",
1265 pcb->rttest, pcb->rtseq, ackno));
1266
1267 /* RTT estimation calculations. This is done by checking if the
1268 incoming segment acknowledges the segment we use to take a
1269 round-trip time measurement. */
1270 if (pcb->rttest && TCP_SEQ_LT(pcb->rtseq, ackno)) {
1271 /* diff between this shouldn't exceed 32K since this are tcp timer ticks
1272 and a round-trip shouldn't be that long... */
1273 m = (s16_t)(tcp_ticks - pcb->rttest);
1274
1275 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: experienced rtt %"U16_F" ticks (%"U16_F" msec).\n",
1276 m, (u16_t)(m * TCP_SLOW_INTERVAL)));
1277
1278 /* This is taken directly from VJs original code in his paper */
1279 m = m - (pcb->sa >> 3);
1280 pcb->sa += m;
1281 if (m < 0) {
1282 m = -m;
1283 }
1284 m = m - (pcb->sv >> 2);
1285 pcb->sv += m;
1286 pcb->rto = (pcb->sa >> 3) + pcb->sv;
1287
1288 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: RTO %"U16_F" (%"U16_F" milliseconds)\n",
1289 pcb->rto, (u16_t)(pcb->rto * TCP_SLOW_INTERVAL)));
1290
1291 pcb->rttest = 0;
1292 }
1293 }
1294
1295 /* If the incoming segment contains data, we must process it
1296 further unless the pcb already received a FIN.
1297 (RFC 793, chapter 3.9, "SEGMENT ARRIVES" in states CLOSE-WAIT, CLOSING,
1298 LAST-ACK and TIME-WAIT: "Ignore the segment text.") */
1299 if ((tcplen > 0) && (pcb->state < CLOSE_WAIT)) {
1300 /* This code basically does three things:
1301
1302 +) If the incoming segment contains data that is the next
1303 in-sequence data, this data is passed to the application. This
1304 might involve trimming the first edge of the data. The rcv_nxt
1305 variable and the advertised window are adjusted.
1306
1307 +) If the incoming segment has data that is above the next
1308 sequence number expected (->rcv_nxt), the segment is placed on
1309 the ->ooseq queue. This is done by finding the appropriate
1310 place in the ->ooseq queue (which is ordered by sequence
1311 number) and trim the segment in both ends if needed. An
1312 immediate ACK is sent to indicate that we received an
1313 out-of-sequence segment.
1314
1315 +) Finally, we check if the first segment on the ->ooseq queue
1316 now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
1317 rcv_nxt > ooseq->seqno, we must trim the first edge of the
1318 segment on ->ooseq before we adjust rcv_nxt. The data in the
1319 segments that are now on sequence are chained onto the
1320 incoming segment so that we only need to call the application
1321 once.
1322 */
1323
1324 /* First, we check if we must trim the first edge. We have to do
1325 this if the sequence number of the incoming segment is less
1326 than rcv_nxt, and the sequence number plus the length of the
1327 segment is larger than rcv_nxt. */
1328 /* if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
1329 if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {*/
1330 if (TCP_SEQ_BETWEEN(pcb->rcv_nxt, seqno + 1, seqno + tcplen - 1)) {
1331 /* Trimming the first edge is done by pushing the payload
1332 pointer in the pbuf downwards. This is somewhat tricky since
1333 we do not want to discard the full contents of the pbuf up to
1334 the new starting point of the data since we have to keep the
1335 TCP header which is present in the first pbuf in the chain.
1336
1337 What is done is really quite a nasty hack: the first pbuf in
1338 the pbuf chain is pointed to by inseg.p. Since we need to be
1339 able to deallocate the whole pbuf, we cannot change this
1340 inseg.p pointer to point to any of the later pbufs in the
1341 chain. Instead, we point the ->payload pointer in the first
1342 pbuf to data in one of the later pbufs. We also set the
1343 inseg.data pointer to point to the right place. This way, the
1344 ->p pointer will still point to the first pbuf, but the
1345 ->p->payload pointer will point to data in another pbuf.
1346
1347 After we are done with adjusting the pbuf pointers we must
1348 adjust the ->data pointer in the seg and the segment
1349 length.*/
1350
1351 struct pbuf *p = inseg.p;
1352 off = pcb->rcv_nxt - seqno;
1353 LWIP_ASSERT("inseg.p != NULL", inseg.p);
1354 LWIP_ASSERT("insane offset!", (off < 0x7fff));
1355 if (inseg.p->len < off) {
1356 LWIP_ASSERT("pbuf too short!", (((s32_t)inseg.p->tot_len) >= off));
1357 new_tot_len = (u16_t)(inseg.p->tot_len - off);
1358 while (p->len < off) {
1359 off -= p->len;
1360 /* KJM following line changed (with addition of new_tot_len var)
1361 to fix bug #9076
1362 inseg.p->tot_len -= p->len; */
1363 p->tot_len = new_tot_len;
1364 p->len = 0;
1365 p = p->next;
1366 }
1367 if (pbuf_header(p, (s16_t)-off)) {
1368 /* Do we need to cope with this failing? Assert for now */
1369 LWIP_ASSERT("pbuf_header failed", 0);
1370 }
1371 } else {
1372 if (pbuf_header(inseg.p, (s16_t)-off)) {
1373 /* Do we need to cope with this failing? Assert for now */
1374 LWIP_ASSERT("pbuf_header failed", 0);
1375 }
1376 }
1377 inseg.len -= (u16_t)(pcb->rcv_nxt - seqno);
1378 inseg.tcphdr->seqno = seqno = pcb->rcv_nxt;
1379 }
1380 else {
1381 if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
1382 /* the whole segment is < rcv_nxt */
1383 /* must be a duplicate of a packet that has already been correctly handled */
1384
1385 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: duplicate seqno %"U32_F"\n", seqno));
1386 tcp_ack_now(pcb);
1387 }
1388 }
1389
1390 /* The sequence number must be within the window (above rcv_nxt
1391 and below rcv_nxt + rcv_wnd) in order to be further
1392 processed. */
1393 if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt,
1394 pcb->rcv_nxt + pcb->rcv_wnd - 1)) {
1395 if (pcb->rcv_nxt == seqno) {
1396 /* The incoming segment is the next in sequence. We check if
1397 we have to trim the end of the segment and update rcv_nxt
1398 and pass the data to the application. */
1399 tcplen = TCP_TCPLEN(&inseg);
1400
1401 if (tcplen > pcb->rcv_wnd) {
1402 LWIP_DEBUGF(TCP_INPUT_DEBUG,
1403 ("tcp_receive: other end overran receive window"
1404 "seqno %"U32_F" len %"U16_F" right edge %"U32_F"\n",
1405 seqno, tcplen, pcb->rcv_nxt + pcb->rcv_wnd));
1406 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
1407 /* Must remove the FIN from the header as we're trimming
1408 * that byte of sequence-space from the packet */
1409 TCPH_FLAGS_SET(inseg.tcphdr, TCPH_FLAGS(inseg.tcphdr) & ~(unsigned int)TCP_FIN);
1410 }
1411 /* Adjust length of segment to fit in the window. */
1412 TCPWND_CHECK16(pcb->rcv_wnd);
1413 inseg.len = (u16_t)pcb->rcv_wnd;
1414 if (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) {
1415 inseg.len -= 1;
1416 }
1417 pbuf_realloc(inseg.p, inseg.len);
1418 tcplen = TCP_TCPLEN(&inseg);
1419 LWIP_ASSERT("tcp_receive: segment not trimmed correctly to rcv_wnd\n",
1420 (seqno + tcplen) == (pcb->rcv_nxt + pcb->rcv_wnd));
1421 }
1422 #if TCP_QUEUE_OOSEQ
1423 /* Received in-sequence data, adjust ooseq data if:
1424 - FIN has been received or
1425 - inseq overlaps with ooseq */
1426 if (pcb->ooseq != NULL) {
1427 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
1428 LWIP_DEBUGF(TCP_INPUT_DEBUG,
1429 ("tcp_receive: received in-order FIN, binning ooseq queue\n"));
1430 /* Received in-order FIN means anything that was received
1431 * out of order must now have been received in-order, so
1432 * bin the ooseq queue */
1433 while (pcb->ooseq != NULL) {
1434 struct tcp_seg *old_ooseq = pcb->ooseq;
1435 pcb->ooseq = pcb->ooseq->next;
1436 tcp_seg_free(old_ooseq);
1437 }
1438 } else {
1439 next = pcb->ooseq;
1440 /* Remove all segments on ooseq that are covered by inseg already.
1441 * FIN is copied from ooseq to inseg if present. */
1442 while (next &&
1443 TCP_SEQ_GEQ(seqno + tcplen,
1444 next->tcphdr->seqno + next->len)) {
1445 /* inseg cannot have FIN here (already processed above) */
1446 if (TCPH_FLAGS(next->tcphdr) & TCP_FIN &&
1447 (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) == 0) {
1448 TCPH_SET_FLAG(inseg.tcphdr, TCP_FIN);
1449 tcplen = TCP_TCPLEN(&inseg);
1450 }
1451 prev = next;
1452 next = next->next;
1453 tcp_seg_free(prev);
1454 }
1455 /* Now trim right side of inseg if it overlaps with the first
1456 * segment on ooseq */
1457 if (next &&
1458 TCP_SEQ_GT(seqno + tcplen,
1459 next->tcphdr->seqno)) {
1460 /* inseg cannot have FIN here (already processed above) */
1461 inseg.len = (u16_t)(next->tcphdr->seqno - seqno);
1462 if (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) {
1463 inseg.len -= 1;
1464 }
1465 pbuf_realloc(inseg.p, inseg.len);
1466 tcplen = TCP_TCPLEN(&inseg);
1467 LWIP_ASSERT("tcp_receive: segment not trimmed correctly to ooseq queue\n",
1468 (seqno + tcplen) == next->tcphdr->seqno);
1469 }
1470 pcb->ooseq = next;
1471 }
1472 }
1473 #endif /* TCP_QUEUE_OOSEQ */
1474
1475 pcb->rcv_nxt = seqno + tcplen;
1476
1477 /* Update the receiver's (our) window. */
1478 LWIP_ASSERT("tcp_receive: tcplen > rcv_wnd\n", pcb->rcv_wnd >= tcplen);
1479 pcb->rcv_wnd -= tcplen;
1480
1481 tcp_update_rcv_ann_wnd(pcb);
1482
1483 /* If there is data in the segment, we make preparations to
1484 pass this up to the application. The ->recv_data variable
1485 is used for holding the pbuf that goes to the
1486 application. The code for reassembling out-of-sequence data
1487 chains its data on this pbuf as well.
1488
1489 If the segment was a FIN, we set the TF_GOT_FIN flag that will
1490 be used to indicate to the application that the remote side has
1491 closed its end of the connection. */
1492 if (inseg.p->tot_len > 0) {
1493 recv_data = inseg.p;
1494 /* Since this pbuf now is the responsibility of the
1495 application, we delete our reference to it so that we won't
1496 (mistakingly) deallocate it. */
1497 inseg.p = NULL;
1498 }
1499 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
1500 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: received FIN.\n"));
1501 recv_flags |= TF_GOT_FIN;
1502 }
1503
1504 #if TCP_QUEUE_OOSEQ
1505 /* We now check if we have segments on the ->ooseq queue that
1506 are now in sequence. */
1507 while (pcb->ooseq != NULL &&
1508 pcb->ooseq->tcphdr->seqno == pcb->rcv_nxt) {
1509
1510 cseg = pcb->ooseq;
1511 seqno = pcb->ooseq->tcphdr->seqno;
1512
1513 pcb->rcv_nxt += TCP_TCPLEN(cseg);
1514 LWIP_ASSERT("tcp_receive: ooseq tcplen > rcv_wnd\n",
1515 pcb->rcv_wnd >= TCP_TCPLEN(cseg));
1516 pcb->rcv_wnd -= TCP_TCPLEN(cseg);
1517
1518 tcp_update_rcv_ann_wnd(pcb);
1519
1520 if (cseg->p->tot_len > 0) {
1521 /* Chain this pbuf onto the pbuf that we will pass to
1522 the application. */
1523 /* With window scaling, this can overflow recv_data->tot_len, but
1524 that's not a problem since we explicitly fix that before passing
1525 recv_data to the application. */
1526 if (recv_data) {
1527 pbuf_cat(recv_data, cseg->p);
1528 } else {
1529 recv_data = cseg->p;
1530 }
1531 cseg->p = NULL;
1532 }
1533 if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
1534 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: dequeued FIN.\n"));
1535 recv_flags |= TF_GOT_FIN;
1536 if (pcb->state == ESTABLISHED) { /* force passive close or we can move to active close */
1537 pcb->state = CLOSE_WAIT;
1538 }
1539 }
1540
1541 pcb->ooseq = cseg->next;
1542 tcp_seg_free(cseg);
1543 }
1544 #if LWIP_TCP_SACK_OUT
1545 if (ip_get_option(pcb, SOF_TCPSACK) && (pcb->flags & TF_SACK)) {
1546 if (pcb->ooseq != NULL) {
1547 /* Some segments may have been removed from ooseq, let's remove all SACKs that
1548 describe anything before the new beginning of that list. */
1549 tcp_remove_sacks_lt(pcb, pcb->ooseq->tcphdr->seqno);
1550 } else if (LWIP_TCP_SACK_VALID(pcb, 0)) {
1551 /* ooseq has been cleared. Nothing to SACK */
1552 memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks));
1553 }
1554 }
1555 #endif /* LWIP_TCP_SACK_OUT */
1556 #endif /* TCP_QUEUE_OOSEQ */
1557
1558
1559 /* Acknowledge the segment(s). */
1560 tcp_ack(pcb);
1561
1562 #if LWIP_TCP_SACK_OUT
1563 if (ip_get_option(pcb, SOF_TCPSACK) && LWIP_TCP_SACK_VALID(pcb, 0)) {
1564 /* Normally the ACK for the data received could be piggy-backed on a data packet,
1565 but lwIP currently does not support including SACKs in data packets. So we force
1566 it to respond with an empty ACK packet (only if there is at least one SACK to be sent).
1567 NOTE: tcp_send_empty_ack() on success clears the ACK flags (set by tcp_ack()) */
1568 tcp_send_empty_ack(pcb);
1569 }
1570 #endif /* LWIP_TCP_SACK_OUT */
1571 #if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS
1572 if (ip_current_is_v6()) {
1573 /* Inform neighbor reachability of forward progress. */
1574 nd6_reachability_hint(ip6_current_src_addr());
1575 }
1576 #endif /* LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS*/
1577
1578 } else {
1579 /* We get here if the incoming segment is out-of-sequence. */
1580 tcp_send_empty_ack(pcb);
1581 #if TCP_QUEUE_OOSEQ
1582 /* We queue the segment on the ->ooseq queue. */
1583 if (pcb->ooseq == NULL) {
1584 pcb->ooseq = tcp_seg_copy(&inseg);
1585 #if LWIP_TCP_SACK_OUT
1586 if (ip_get_option(pcb, SOF_TCPSACK) && (pcb->flags & TF_SACK)) {
1587 /* All the SACKs should be invalid, so we can simply store the most recent one: */
1588 pcb->rcv_sacks[0].left = seqno;
1589 pcb->rcv_sacks[0].right = seqno + inseg.len;
1590 }
1591 #endif /* LWIP_TCP_SACK_OUT */
1592 } else {
1593 /* If the queue is not empty, we walk through the queue and
1594 try to find a place where the sequence number of the
1595 incoming segment is between the sequence numbers of the
1596 previous and the next segment on the ->ooseq queue. That is
1597 the place where we put the incoming segment. If needed, we
1598 trim the second edges of the previous and the incoming
1599 segment so that it will fit into the sequence.
1600
1601 If the incoming segment has the same sequence number as a
1602 segment on the ->ooseq queue, we discard the segment that
1603 contains less data. */
1604
1605 #if LWIP_TCP_SACK_OUT
1606 u32_t sackbeg = NULL;
1607 /* This is the left edge of the lowest possible SACK range.
1608 It may start before the newly received segment (possibly adjusted below). */
1609 if(ip_get_option(pcb, SOF_TCPSACK)) {
1610 sackbeg = TCP_SEQ_LT(seqno, pcb->ooseq->tcphdr->seqno) ? seqno : pcb->ooseq->tcphdr->seqno;
1611 }
1612 #endif /* LWIP_TCP_SACK_OUT */
1613
1614 prev = NULL;
1615 for (next = pcb->ooseq; next != NULL; next = next->next) {
1616 if (seqno == next->tcphdr->seqno) {
1617 /* The sequence number of the incoming segment is the
1618 same as the sequence number of the segment on
1619 ->ooseq. We check the lengths to see which one to
1620 discard. */
1621 if (inseg.len > next->len) {
1622 /* The incoming segment is larger than the old
1623 segment. We replace some segments with the new
1624 one. */
1625 cseg = tcp_seg_copy(&inseg);
1626 if (cseg != NULL) {
1627 if (prev != NULL) {
1628 prev->next = cseg;
1629 } else {
1630 pcb->ooseq = cseg;
1631 }
1632 tcp_oos_insert_segment(cseg, next);
1633 }
1634 break;
1635 } else {
1636 /* Either the lengths are the same or the incoming
1637 segment was smaller than the old one; in either
1638 case, we ditch the incoming segment. */
1639 break;
1640 }
1641 } else {
1642 if (prev == NULL) {
1643 if (TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
1644 /* The sequence number of the incoming segment is lower
1645 than the sequence number of the first segment on the
1646 queue. We put the incoming segment first on the
1647 queue. */
1648 cseg = tcp_seg_copy(&inseg);
1649 if (cseg != NULL) {
1650 pcb->ooseq = cseg;
1651 tcp_oos_insert_segment(cseg, next);
1652 }
1653 break;
1654 }
1655 } else {
1656 /*if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
1657 TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {*/
1658 if (TCP_SEQ_BETWEEN(seqno, prev->tcphdr->seqno+1, next->tcphdr->seqno-1)) {
1659 /* The sequence number of the incoming segment is in
1660 between the sequence numbers of the previous and
1661 the next segment on ->ooseq. We trim trim the previous
1662 segment, delete next segments that included in received segment
1663 and trim received, if needed. */
1664 cseg = tcp_seg_copy(&inseg);
1665 if (cseg != NULL) {
1666 if (TCP_SEQ_GT(prev->tcphdr->seqno + prev->len, seqno)) {
1667 /* We need to trim the prev segment. */
1668 prev->len = (u16_t)(seqno - prev->tcphdr->seqno);
1669 pbuf_realloc(prev->p, prev->len);
1670 }
1671 prev->next = cseg;
1672 tcp_oos_insert_segment(cseg, next);
1673 }
1674 break;
1675 }
1676 }
1677 #if LWIP_TCP_SACK_OUT
1678 /* The new segment goes after the 'next' one. If there is a "hole" in sequence numbers
1679 between 'prev' and the beginning of 'next', we want to move sackbeg. */
1680 if (ip_get_option(pcb, SOF_TCPSACK) && prev != NULL && prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) {
1681 sackbeg = next->tcphdr->seqno;
1682 }
1683 #endif /* LWIP_TCP_SACK_OUT */
1684
1685 /* We don't use 'prev' below, so let's set it to current 'next'.
1686 This way even if we break the loop below, 'prev' will be pointing
1687 at the segment right in front of the newly added one. */
1688 prev = next;
1689 /* If the "next" segment is the last segment on the
1690 ooseq queue, we add the incoming segment to the end
1691 of the list. */
1692 if (next->next == NULL &&
1693 TCP_SEQ_GT(seqno, next->tcphdr->seqno)) {
1694 if (TCPH_FLAGS(next->tcphdr) & TCP_FIN) {
1695 /* segment "next" already contains all data */
1696 break;
1697 }
1698 next->next = tcp_seg_copy(&inseg);
1699 if (next->next != NULL) {
1700 if (TCP_SEQ_GT(next->tcphdr->seqno + next->len, seqno)) {
1701 /* We need to trim the last segment. */
1702 next->len = (u16_t)(seqno - next->tcphdr->seqno);
1703 pbuf_realloc(next->p, next->len);
1704 }
1705 /* check if the remote side overruns our receive window */
1706 if (TCP_SEQ_GT((u32_t)tcplen + seqno, pcb->rcv_nxt + (u32_t)pcb->rcv_wnd)) {
1707 LWIP_DEBUGF(TCP_INPUT_DEBUG,
1708 ("tcp_receive: other end overran receive window"
1709 "seqno %"U32_F" len %"U16_F" right edge %"U32_F"\n",
1710 seqno, tcplen, pcb->rcv_nxt + pcb->rcv_wnd));
1711 if (TCPH_FLAGS(next->next->tcphdr) & TCP_FIN) {
1712 /* Must remove the FIN from the header as we're trimming
1713 * that byte of sequence-space from the packet */
1714 TCPH_FLAGS_SET(next->next->tcphdr, TCPH_FLAGS(next->next->tcphdr) & ~TCP_FIN);
1715 }
1716 /* Adjust length of segment to fit in the window. */
1717 next->next->len = (u16_t)(pcb->rcv_nxt + pcb->rcv_wnd - seqno);
1718 pbuf_realloc(next->next->p, next->next->len);
1719 tcplen = TCP_TCPLEN(next->next);
1720 LWIP_ASSERT("tcp_receive: segment not trimmed correctly to rcv_wnd\n",
1721 (seqno + tcplen) == (pcb->rcv_nxt + pcb->rcv_wnd));
1722 }
1723 }
1724 break;
1725 }
1726 }
1727 }
1728
1729 #if LWIP_TCP_SACK_OUT
1730 if (ip_get_option(pcb, SOF_TCPSACK) && (pcb->flags & TF_SACK)) {
1731 if (prev == NULL) {
1732 /* The new segment is at the beginning. sackbeg should already be set properly.
1733 We need to find the right edge. */
1734 next = pcb->ooseq;
1735 } else if (prev->next != NULL) {
1736 /* The new segment was added after 'prev'. If there is a "hole" between 'prev' and 'prev->next',
1737 we need to move sackbeg. After that we should find the right edge. */
1738 next = prev->next;
1739 if (prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) {
1740 sackbeg = next->tcphdr->seqno;
1741 }
1742 } else {
1743 next = NULL;
1744 }
1745 if (next != NULL) {
1746 u32_t sackend = next->tcphdr->seqno;
1747 for ( ; (next != NULL) && (sackend == next->tcphdr->seqno); next = next->next) {
1748 sackend += next->len;
1749 }
1750 tcp_add_sack(pcb, sackbeg, sackend);
1751 }
1752 }
1753 #endif /* LWIP_TCP_SACK_OUT */
1754 }
1755 #if TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS
1756 /* Check that the data on ooseq doesn't exceed one of the limits
1757 and throw away everything above that limit. */
1758 ooseq_blen = 0;
1759 ooseq_qlen = 0;
1760 prev = NULL;
1761 for (next = pcb->ooseq; next != NULL; prev = next, next = next->next) {
1762 struct pbuf *p = next->p;
1763 ooseq_blen += p->tot_len;
1764 ooseq_qlen += pbuf_clen(p);
1765 if ((ooseq_blen > TCP_OOSEQ_MAX_BYTES) ||
1766 (ooseq_qlen > TCP_OOSEQ_MAX_PBUFS)) {
1767 /* too much ooseq data, dump this and everything after it */
1768 tcp_segs_free(next);
1769 if (prev == NULL) {
1770 /* first ooseq segment is too much, dump the whole queue */
1771 pcb->ooseq = NULL;
1772 } else {
1773 /* just dump 'next' and everything after it */
1774 prev->next = NULL;
1775 }
1776 break;
1777 }
1778 }
1779 #endif /* TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS */
1780 #endif /* TCP_QUEUE_OOSEQ */
1781 tcp_send_empty_ack(pcb);
1782 }
1783 } else {
1784 /* The incoming segment is not within the window. */
1785 tcp_send_empty_ack(pcb);
1786 }
1787 } else {
1788 /* Segments with length 0 is taken care of here. Segments that
1789 fall out of the window are ACKed. */
1790 if (!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd - 1)) {
1791 tcp_ack_now(pcb);
1792 }
1793 }
1794 }
1795
1796 static u8_t
1797 tcp_getoptbyte(void)
1798 {
1799 if ((tcphdr_opt2 == NULL) || (tcp_optidx < tcphdr_opt1len)) {
1800 u8_t* opts = (u8_t *)tcphdr + TCP_HLEN;
1801 return opts[tcp_optidx++];
1802 } else {
1803 u8_t idx = (u8_t)(tcp_optidx++ - tcphdr_opt1len);
1804 return tcphdr_opt2[idx];
1805 }
1806 }
1807
1808 /**
1809 * Parses the options contained in the incoming segment.
1810 *
1811 * Called from tcp_listen_input() and tcp_process().
1812 * Currently, only the MSS option is supported!
1813 *
1814 * @param pcb the tcp_pcb for which a segment arrived
1815 */
1816 static void
1817 tcp_parseopt(struct tcp_pcb *pcb)
1818 {
1819 u8_t data;
1820 u16_t mss;
1821 #if LWIP_TCP_TIMESTAMPS
1822 u32_t tsval;
1823 #endif
1824
1825 /* Parse the TCP MSS option, if present. */
1826 if (tcphdr_optlen != 0) {
1827 for (tcp_optidx = 0; tcp_optidx < tcphdr_optlen; ) {
1828 u8_t opt = tcp_getoptbyte();
1829 switch (opt) {
1830 case LWIP_TCP_OPT_EOL:
1831 /* End of options. */
1832 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: EOL\n"));
1833 return;
1834 case LWIP_TCP_OPT_NOP:
1835 /* NOP option. */
1836 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: NOP\n"));
1837 break;
1838 case LWIP_TCP_OPT_MSS:
1839 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: MSS\n"));
1840 if (tcp_getoptbyte() != LWIP_TCP_OPT_LEN_MSS || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_MSS) > tcphdr_optlen) {
1841 /* Bad length */
1842 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
1843 return;
1844 }
1845 /* An MSS option with the right option length. */
1846 mss = (tcp_getoptbyte() << 8);
1847 mss |= tcp_getoptbyte();
1848 /* Limit the mss to the configured TCP_MSS and prevent division by zero */
1849 pcb->mss = ((mss > TCP_MSS) || (mss == 0)) ? TCP_MSS : mss;
1850 break;
1851 #if LWIP_WND_SCALE
1852 case LWIP_TCP_OPT_WS:
1853 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: WND_SCALE\n"));
1854 if (tcp_getoptbyte() != LWIP_TCP_OPT_LEN_WS || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_WS) > tcphdr_optlen) {
1855 /* Bad length */
1856 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
1857 return;
1858 }
1859 /* If syn was received with wnd scale option,
1860 activate wnd scale opt, but only if this is not a retransmission */
1861 if ((flags & TCP_SYN) && !(pcb->flags & TF_WND_SCALE)) {
1862 /* An WND_SCALE option with the right option length. */
1863 data = tcp_getoptbyte();
1864 pcb->snd_scale = data;
1865 if (pcb->snd_scale > 14U) {
1866 pcb->snd_scale = 14U;
1867 }
1868 pcb->rcv_scale = TCP_RCV_SCALE;
1869 pcb->flags |= TF_WND_SCALE;
1870
1871 if(pcb->usr_rcv_wnd == 0) {
1872 /* window scaling is enabled, we can use the full receive window */
1873 LWIP_ASSERT("window not at default value", pcb->rcv_wnd == TCPWND_MIN16(TCP_WND));
1874 LWIP_ASSERT("window not at default value", pcb->rcv_ann_wnd == TCPWND_MIN16(TCP_WND));
1875 pcb->rcv_wnd = pcb->rcv_ann_wnd = TCP_WND;
1876 }
1877 else {
1878 pcb->rcv_wnd = pcb->rcv_ann_wnd = pcb->usr_rcv_wnd;
1879 }
1880 }
1881 break;
1882 #endif
1883 #if LWIP_TCP_TIMESTAMPS
1884 case LWIP_TCP_OPT_TS:
1885 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: TS\n"));
1886 if (tcp_getoptbyte() != LWIP_TCP_OPT_LEN_TS || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_TS) > tcphdr_optlen) {
1887 /* Bad length */
1888 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
1889 return;
1890 }
1891 /* TCP timestamp option with valid length */
1892 tsval = tcp_getoptbyte();
1893 tsval |= (tcp_getoptbyte() << 8);
1894 tsval |= (tcp_getoptbyte() << 16);
1895 tsval |= (tcp_getoptbyte() << 24);
1896 if (flags & TCP_SYN) {
1897 pcb->ts_recent = lwip_ntohl(tsval);
1898 /* Enable sending timestamps in every segment now that we know
1899 the remote host supports it. */
1900 pcb->flags |= TF_TIMESTAMP;
1901 } else if (TCP_SEQ_BETWEEN(pcb->ts_lastacksent, seqno, seqno+tcplen)) {
1902 pcb->ts_recent = lwip_ntohl(tsval);
1903 }
1904 /* Advance to next option (6 bytes already read) */
1905 tcp_optidx += LWIP_TCP_OPT_LEN_TS - 6;
1906 break;
1907 #endif
1908 #if LWIP_TCP_SACK_OUT
1909 case LWIP_TCP_OPT_SACK_PERM:
1910 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: SACK_PERM\n"));
1911
1912 if(!ip_get_option(pcb, SOF_TCPSACK)) {
1913 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: SACK_PERM is not enabled\n"));
1914 return;
1915 }
1916
1917 if (tcp_getoptbyte() != LWIP_TCP_OPT_LEN_SACK_PERM || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_SACK_PERM) > tcphdr_optlen) {
1918 /* Bad length */
1919 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
1920 return;
1921 }
1922 /* TCP SACK_PERM option with valid length */
1923 if (flags & TCP_SYN) {
1924 /* We only set it if we receive it in a SYN (or SYN+ACK) packet */
1925 pcb->flags |= TF_SACK;
1926 }
1927 break;
1928 #endif /* LWIP_TCP_SACK_OUT */
1929 default:
1930 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: other\n"));
1931 data = tcp_getoptbyte();
1932 if (data < 2) {
1933 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
1934 /* If the length field is zero, the options are malformed
1935 and we don't process them further. */
1936 return;
1937 }
1938 /* All other options have a length field, so that we easily
1939 can skip past them. */
1940 tcp_optidx += data - 2;
1941 }
1942 }
1943 }
1944 }
1945
1946 void
1947 tcp_trigger_input_pcb_close(void)
1948 {
1949 recv_flags |= TF_CLOSED;
1950 }
1951
1952 #if LWIP_TCP_SACK_OUT
1953 /**
1954 * Called by tcp_receive() to add new SACK entry.
1955 *
1956 * The new SACK entry will be placed at the beginning of rcv_sacks[], as the newest one.
1957 * Existing SACK entries will be "pushed back", to preserve their order.
1958 * This is the behavior described in RFC 2018, section 4.
1959 *
1960 * @param pcb the tcp_pcb for which a segment arrived
1961 * @param left the left side of the SACK (the first sequence number)
1962 * @param right the right side of the SACK (the first sequence number past this SACK)
1963 */
1964 static void
1965 tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right)
1966 {
1967 u8_t i;
1968 u8_t unused_idx;
1969
1970 if ((pcb->flags & TF_SACK) == 0 || !TCP_SEQ_LT(left, right)) {
1971 return;
1972 }
1973
1974 /* First, let's remove all SACKs that are no longer needed (because they overlap with the newest one),
1975 while moving all other SACKs forward.
1976 We run this loop for all entries, until we find the first invalid one.
1977 There is no point checking after that. */
1978 for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && LWIP_TCP_SACK_VALID(pcb, i); ++i) {
1979 /* We only want to use SACK at [i] if it doesn't overlap with left:right range.
1980 It does not overlap if its right side is before the newly added SACK,
1981 or if its left side is after the newly added SACK.
1982 NOTE: The equality should not really happen, but it doesn't hurt. */
1983 if (TCP_SEQ_LEQ(pcb->rcv_sacks[i].right, left) || TCP_SEQ_LEQ(right, pcb->rcv_sacks[i].left)) {
1984 if (unused_idx != i) {
1985 /* We don't need to copy if it's already in the right spot */
1986 pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i];
1987 }
1988 ++unused_idx;
1989 }
1990 }
1991
1992 /* Now 'unused_idx' is the index of the first invalid SACK entry,
1993 anywhere between 0 (no valid entries) and LWIP_TCP_MAX_SACK_NUM (all entries are valid).
1994 We want to clear this and all following SACKs.
1995 However, we will be adding another one in the front (and shifting everything else back).
1996 So let's just iterate from the back, and set each entry to the one to the left if it's valid,
1997 or to 0 if it is not. */
1998 for (i = LWIP_TCP_MAX_SACK_NUM - 1; i > 0; --i) {
1999 /* [i] is the index we are setting, and the value should be at index [i-1],
2000 or 0 if that index is unused (>= unused_idx). */
2001 if (i - 1 >= unused_idx) {
2002 /* [i-1] is unused. Let's clear [i]. */
2003 pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0;
2004 } else {
2005 pcb->rcv_sacks[i] = pcb->rcv_sacks[i - 1];
2006 }
2007 }
2008
2009 /* And now we can store the newest SACK */
2010 pcb->rcv_sacks[0].left = left;
2011 pcb->rcv_sacks[0].right = right;
2012 }
2013
2014 /**
2015 * Called to remove a range of SACKs.
2016 *
2017 * SACK entries will be removed or adjusted to not acknowledge any sequence
2018 * numbers that are less than 'seq' passed. It not only invalidates entries,
2019 * but also moves all entries that are still valid to the beginning.
2020 *
2021 * @param pcb the tcp_pcb to modify
2022 * @param seq the lowest sequence number to keep in SACK entries
2023 */
2024 static void
2025 tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq)
2026 {
2027 u8_t i;
2028 u8_t unused_idx;
2029
2030 /* We run this loop for all entries, until we find the first invalid one.
2031 There is no point checking after that. */
2032 for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && LWIP_TCP_SACK_VALID(pcb, i); ++i) {
2033 /* We only want to use SACK at index [i] if its right side is > 'seq'. */
2034 if (TCP_SEQ_GT(pcb->rcv_sacks[i].right, seq)) {
2035 if (unused_idx != i) {
2036 /* We only copy it if it's not in the right spot already. */
2037 pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i];
2038 }
2039 /* NOTE: It is possible that its left side is < 'seq', in which case we should adjust it. */
2040 if (TCP_SEQ_LT(pcb->rcv_sacks[unused_idx].left, seq)) {
2041 pcb->rcv_sacks[unused_idx].left = seq;
2042 }
2043 ++unused_idx;
2044 }
2045 }
2046
2047 /* We also need to invalidate everything from 'unused_idx' till the end */
2048 for (i = unused_idx; i < LWIP_TCP_MAX_SACK_NUM; ++i) {
2049 pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0;
2050 }
2051 }
2052 #endif /* LWIP_TCP_SACK_OUT */
2053 #endif /* LWIP_TCP */
2054