概述
在主动关闭方发送了FIN之后,进入FIN_WAIT_1状态,在此状态收到了ACK,则进入FIN_WAIT_2状态,而FIN_WAIT_2后续要做的工作是等待接收对端发过来的FIN包,并且发送ACK,进而进入到TIME_WAIT状态;本文主要关注从FIN_WAIT_1进入FIN_WAIT_2状态,以及在FIN_WAIT_2状态来包或者定时器触发后的处理流程;
进入FIN_WAIT_2
tcp_rcv_state_process函数中对于ack的处理步骤中,假如连接处于FIN_WAIT_1,且数据均已经被确认完,则进入TIME_WAIT_2状态;如果无需在该状态等待(linger2<0),或者收到了乱序数据段,则直接关闭连接;如果需要等待,则需要判断等待时间与TIMEWAIT时间的大小关系,若>TIMEWAIT_LEN,则添加TIME_WAIT_2定时器,否则直接进入TIME_WAIT接管(其子状态仍然是FIN_WAIT_2),接管之后会添加TIME_WAIT定时器;
另,tcp_close函数调用时,如果当前状态是FIN_WAIT_2也会用相似方式进入TIME_WAIT接管,不再单独介绍;
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
2 {
3 /* step 5: check the ACK field */
4 acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5 FLAG_UPDATE_TS_RECENT) > 0;
6
7 switch (sk->sk_state) {
8 case TCP_FIN_WAIT1: {
9 int tmo;
10
11 /* If we enter the TCP_FIN_WAIT1 state and we are a
12 * Fast Open socket and this is the first acceptable
13 * ACK we have received, this would have acknowledged
14 * our SYNACK so stop the SYNACK timer.
15 */
16 if (req) {
17 /* Return RST if ack_seq is invalid.
18 * Note that RFC793 only says to generate a
19 * DUPACK for it but for TCP Fast Open it seems
20 * better to treat this case like TCP_SYN_RECV
21 * above.
22 */
23 if (!acceptable)
24 return 1;
25 /* We no longer need the request sock. */
26 reqsk_fastopen_remove(sk, req, false);
27 tcp_rearm_rto(sk);
28 }
29
30 /* 发送数据未确认完毕 */
31 if (tp->snd_una != tp->write_seq)
32 break;
33
34 /* 进入FIN_WAIT_2状态 */
35 tcp_set_state(sk, TCP_FIN_WAIT2);
36
37 /* 关闭发送端 */
38 sk->sk_shutdown |= SEND_SHUTDOWN;
39
40 /* 路由缓存确认 */
41 sk_dst_confirm(sk);
42
43 /* 套接口不是DEAD状态,状态发生变化,唤醒等待进程 */
44 if (!sock_flag(sk, SOCK_DEAD)) {
45 /* Wake up lingering close() */
46 sk->sk_state_change(sk);
47 break;
48 }
49
50 /* linger2<0,无需在FIN_WAIT_2等待 */
51 if (tp->linger2 < 0) {
52 /* 关闭连接 */
53 tcp_done(sk);
54 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
55 return 1;
56 }
57
58 /* 收到期望序号以后的数据段(data, fin) */
59 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
60 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
61 /* Receive out of order FIN after close() */
62 if (tp->syn_fastopen && th->fin)
63 tcp_fastopen_active_disable(sk);
64 /* 关闭连接 */
65 tcp_done(sk);
66 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
67 return 1;
68 }
69
70 /* 获取FIN_WAIT_2等待时间 */
71 tmo = tcp_fin_time(sk);
72
73 /* > TIMEWAIT_LEN,加入FIN_WAIT_2定时器 */
74 if (tmo > TCP_TIMEWAIT_LEN) {
75 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
76 }
77 /* 有fin?? 或者 被用户进程锁定,加入FIN_WAIT_2定时器 */
78 else if (th->fin || sock_owned_by_user(sk)) {
79 /* Bad case. We could lose such FIN otherwise.
80 * It is not a big problem, but it looks confusing
81 * and not so rare event. We still can lose it now,
82 * if it spins in bh_lock_sock(), but it is really
83 * marginal case.
84 */
85 inet_csk_reset_keepalive_timer(sk, tmo);
86 }
87 /* 正常等待时间< TIMEWAIT_LEN,进入TIMEWAIT接管状态 */
88 else {
89 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
90 goto discard;
91 }
92 break;
93 }
94 }
状态转换触发
FIN_WAIT_2状态的走向有以下几个流程触发点,(1)TIME_WAIT_2定时器未超时时间内,收到数据段触发; (2)TIME_WAIT_2定时器超时触发; (3)TIME_WAIT定时器未超时时间内,收到数据段触发; (4)TIME_WAIT定时器超时触发;
(1) TIME_WAIT_2定时器未超时时间内,收到数据段触发,如果设置FIN标记,则直接进入TIME_WAIT状态;
在函数tcp_rcv_state_process处理数据段的过程中,FIN_WAIT_2状态最终会调用tcp_data_queue来处理数据段;
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
2 {
3 /* step 7: process the segment text */
4 switch (sk->sk_state) {
5 case TCP_CLOSE_WAIT:
6 case TCP_CLOSING:
7 case TCP_LAST_ACK:
8 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
9 break;
10 case TCP_FIN_WAIT1:
11 case TCP_FIN_WAIT2:
12 /* RFC 793 says to queue data in these states,
13 * RFC 1122 says we MUST send a reset.
14 * BSD 4.4 also does reset.
15 */
16 if (sk->sk_shutdown & RCV_SHUTDOWN) {
17 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
18 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
19 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
20 tcp_reset(sk);
21 return 1;
22 }
23 }
24 /* Fall through */
25 case TCP_ESTABLISHED:
26 tcp_data_queue(sk, skb);
27 queued = 1;
28 break;
29 }
30 }
tcp_data_queue在处理数据段的时候,有对FIN标记的检查,如果有该标记,则进入tcp_fin函数;
1 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
2 {
3 /* ... */
4 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
5 tcp_fin(sk);
6 /* ... */
7 }
tcp_fin函数中,如果此时连接状态为FIN_WAIT_2,则发送ACK,并且直接进入TIME_WAIT状态;在tcp_time_wait函数处理中,会删除当前控制块,所以FIN_WAIT_2定时器也就不存在了;
1 void tcp_fin(struct sock *sk)
2 {
3 /* ... */
4 switch (sk->sk_state) {
5 case TCP_FIN_WAIT2:
6 /* Received a FIN -- send ACK and enter TIME_WAIT. */
7 tcp_send_ack(sk);
8 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
9 break;
10 }
11 /* ... */
12 }
(2)TIME_WAIT_2定时器超时触发,如果linger2<0,或者等待时间<=TIMEWAIT_LEN,直接发送reset关闭连接;如果linger2>=0,且等待时间>TIMEWAIT_LEN,则进入TIME_WAIT接管;
1 static void tcp_keepalive_timer (unsigned long data)
2 {
3 /*...*/
4 /* 处于fin_wait2且socket即将销毁,用作FIN_WAIT_2定时器 */
5 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
6
7 /* 停留在FIN_WAIT_2的停留时间>=0 */
8 if (tp->linger2 >= 0) {
9 /* 获取时间差值 */
10 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
11
12 /* 差值>0,等待时间>TIME_WAIT时间,则进入TIME_WAIT状态 */
13 if (tmo > 0) {
14 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
15 goto out;
16 }
17 }
18
19 /* 发送rst */
20 tcp_send_active_reset(sk, GFP_ATOMIC);
21 goto death;
22 }
23 /*...*/
24 }
(3)TIME_WAIT定时器未超时时间内,收到数据段触发,若收到合法的FIN,则进入真正的TIME_WAIT状态;
tcp_v4_rcv收入数据段过程中,会对TIME_WAIT状态做特别处理,而对于TIME_WAIT子状态的处理在函数tcp_timewait_state_process中;
1 int tcp_v4_rcv(struct sk_buff *skb)
2 {
3 /*...*/
4 do_time_wait:
5 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
6 inet_twsk_put(inet_twsk(sk));
7 goto discard_it;
8 }
9
10 /* 校验和错误 */
11 if (tcp_checksum_complete(skb)) {
12 inet_twsk_put(inet_twsk(sk));
13 goto csum_error;
14 }
15
16 /* TIME_WAIT入包处理 */
17 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
18
19 /* 收到syn */
20 case TCP_TW_SYN: {
21 /* 查找监听控制块 */
22 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
23 &tcp_hashinfo, skb,
24 __tcp_hdrlen(th),
25 iph->saddr, th->source,
26 iph->daddr, th->dest,
27 inet_iif(skb));
28
29 /* 找到 */
30 if (sk2) {
31 /* 删除tw控制块 */
32 inet_twsk_deschedule_put(inet_twsk(sk));
33 /* 记录监听控制块 */
34 sk = sk2;
35 refcounted = false;
36
37 /* 进行新请求的处理 */
38 goto process;
39 }
40 /* Fall through to ACK */
41 }
42
43 /* 发送ack */
44 case TCP_TW_ACK:
45 tcp_v4_timewait_ack(sk, skb);
46 break;
47 /* 发送rst */
48 case TCP_TW_RST:
49 tcp_v4_send_reset(sk, skb);
50 /* 删除tw控制块 */
51 inet_twsk_deschedule_put(inet_twsk(sk));
52 goto discard_it;
53 /* 成功*/
54 case TCP_TW_SUCCESS:;
55 }
56 goto discard_it;
57 }
tcp_timewait_state_process函数处理流程中,如果TIME_WAIT的子状态为FIN_WAIT_2,并且收到了合法的FIN之后,会进入真正的TIME_WAIT状态,即子状态也为TIME_WAIT,并且设置TIME_WAIT定时器;
1 enum tcp_tw_status
2 tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
3 const struct tcphdr *th)
4 {
5 /*...*/
6 /* 子状态是FIN_WAIT2 */
7 if (tw->tw_substate == TCP_FIN_WAIT2) {
8 /* Just repeat all the checks of tcp_rcv_state_process() */
9
10 /* Out of window, send ACK */
11 /* 序号回绕或者数据超出窗口范围,发送ack */
12 if (paws_reject ||
13 !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
14 tcptw->tw_rcv_nxt,
15 tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
16 return tcp_timewait_check_oow_rate_limit(
17 tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
18
19 /* rst,则停止调度,销毁tw控制块 */
20 if (th->rst)
21 goto kill;
22
23 /* syn && 序号>= 期望接收序号??? */
24 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
25 return TCP_TW_RST;
26
27 /* Dup ACK? */
28 /* 非ack || 以前的ack || 新的无数据ack */
29 if (!th->ack ||
30 !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
31 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
32 inet_twsk_put(tw);
33 return TCP_TW_SUCCESS;
34 }
35
36 /* New data or FIN. If new data arrive after half-duplex close,
37 * reset.
38 */
39 /* 不是fin,或者fin有数据 */
40 if (!th->fin ||
41 TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1)
42 return TCP_TW_RST;
43
44 /* FIN arrived, enter true time-wait state. */
45 /* fin包,进入真正的TIME_WAIT */
46 tw->tw_substate = TCP_TIME_WAIT;
47
48 /* 设置下一次接收序号 */
49 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
50
51 /* 设置时间戳 */
52 if (tmp_opt.saw_tstamp) {
53 tcptw->tw_ts_recent_stamp = get_seconds();
54 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
55 }
56
57 /*重新设置tw定时器 */
58 inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
59
60 /* 发送ack */
61 return TCP_TW_ACK;
62 }
63 /*...*/
64 }
(4)TIME_WAIT定时器超时触发,定时器超时,将tw控制块从ehash和bhash中删除,在收到数据段会发送reset;
定时器超时会进入到tw_timer_handler处理函数,该函数在统计信息之后,调用inet_twsk_kill;
1 static void tw_timer_handler(unsigned long data)
2 {
3 struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
4
5 if (tw->tw_kill)
6 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
7 else
8 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
9 inet_twsk_kill(tw);
10 }
inet_twsk_kill从ehash和bhash中把tw控制块删除,并且释放之;
1 static void inet_twsk_kill(struct inet_timewait_sock *tw)
2 {
3 struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
4 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
5 struct inet_bind_hashbucket *bhead;
6
7 spin_lock(lock);
8 sk_nulls_del_node_init_rcu((struct sock *)tw);
9 spin_unlock(lock);
10
11 /* Disassociate with bind bucket. */
12 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
13 hashinfo->bhash_size)];
14
15 spin_lock(&bhead->lock);
16 inet_twsk_bind_unhash(tw, hashinfo);
17 spin_unlock(&bhead->lock);
18
19 atomic_dec(&tw->tw_dr->tw_count);
20 inet_twsk_put(tw);
21 }