概述
tcp_rcv_established用于处理已连接状态下的输入,处理过程根据首部预测字段分为快速路径和慢速路径;
1. 在快路中,对是有有数据负荷进行不同处理:
(1) 若无数据,则处理输入ack,释放该skb,检查是否有数据发送,有则发送;
(2) 若有数据,检查是否当前处理进程上下文,并且是期望读取的数据,若是则将数据复制到用户空间,若不满足直接复制到用户空间的情况,或者复制失败,则需要将数据段加入到接收队列中,加入方式包括合并到已有数据段,或者加入队列尾部,并唤醒用户进程通知有数据可读;
2. 在慢路中,会进行更详细的校验,然后处理ack,处理紧急数据,接收数据段,其中数据段可能包含乱序的情况,最后进行是否有数据和ack的发送检查;
源码分析
1 he first three cases are guaranteed by proper pred_flags setting,
2 * the rest is checked inline. Fast processing is turned on in
3 * tcp_data_queue when everything is OK.
4 */
5 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
6 const struct tcphdr *th, unsigned int len)
7 {
8 struct tcp_sock *tp = tcp_sk(sk);
9
10 skb_mstamp_get(&tp->tcp_mstamp);
11 /* 路由为空,则重新设置路由 */
12 if (unlikely(!sk->sk_rx_dst))
13 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
14 /*
15 * Header prediction.
16 * The code loosely follows the one in the famous
17 * "30 instruction TCP receive" Van Jacobson mail.
18 *
19 * Van's trick is to deposit buffers into socket queue
20 * on a device interrupt, to call tcp_recv function
21 * on the receive process context and checksum and copy
22 * the buffer to user space. smart...
23 *
24 * Our current scheme is not silly either but we take the
25 * extra cost of the net_bh soft interrupt processing...
26 * We do checksum and copy also but from device to kernel.
27 */
28
29 tp->rx_opt.saw_tstamp = 0;
30
31 /* pred_flags is 0xS?10 << 16 + snd_wnd
32 * if header_prediction is to be made
33 * 'S' will always be tp->tcp_header_len >> 2
34 * '?' will be 0 for the fast path, otherwise pred_flags is 0 to
35 * turn it off (when there are holes in the receive
36 * space for instance)
37 * PSH flag is ignored.
38 */
39
40 /* 快路检查&& 序号正确 && ack序号正确 */
41 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
42 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
43 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
44 /* tcp头部长度 */
45 int tcp_header_len = tp->tcp_header_len;
46
47 /* Timestamp header prediction: tcp_header_len
48 * is automatically equal to th->doff*4 due to pred_flags
49 * match.
50 */
51
52 /* Check timestamp */
53 /* 有时间戳选项 */
54 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
55 /* No? Slow path! */
56 /* 解析时间戳选项失败,执行慢路 */
57 if (!tcp_parse_aligned_timestamp(tp, th))
58 goto slow_path;
59
60 /* If PAWS failed, check it more carefully in slow path */
61 /* 序号回转,执行慢路 */
62 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
63 goto slow_path;
64
65 /* DO NOT update ts_recent here, if checksum fails
66 * and timestamp was corrupted part, it will result
67 * in a hung connection since we will drop all
68 * future packets due to the PAWS test.
69 */
70 }
71
72 /* 无数据 */
73 if (len <= tcp_header_len) {
74 /* Bulk data transfer: sender */
75 if (len == tcp_header_len) {
76 /* Predicted packet is in window by definition.
77 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
78 * Hence, check seq<=rcv_wup reduces to:
79 */
80 /*
81 有时间戳选项
82 && 所有接收的数据段均确认完毕
83 保存时间戳
84 */
85 if (tcp_header_len ==
86 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
87 tp->rcv_nxt == tp->rcv_wup)
88 tcp_store_ts_recent(tp);
89
90 /* We know that such packets are checksummed
91 * on entry.
92 */
93 /* 输入ack处理 */
94 tcp_ack(sk, skb, 0);
95 /* 释放skb */
96 __kfree_skb(skb);
97
98 /* 检查是否有数据要发送,并检查发送缓冲区大小 */
99 tcp_data_snd_check(sk);
100 return;
101 }
102 /* 数据多小,比头部都小,错包 */
103 else { /* Header too small */
104 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
105 goto discard;
106 }
107 }
108 /* 有数据 */
109 else {
110 int eaten = 0;
111 bool fragstolen = false;
112
113 /* 读取进程上下文 */
114 if (tp->ucopy.task == current &&
115 /* 期待读取的和期待接收的序号一致 */
116 tp->copied_seq == tp->rcv_nxt &&
117 /* 数据<= 待读取长度 */
118 len - tcp_header_len <= tp->ucopy.len &&
119 /* 控制块被用户空间锁定 */
120 sock_owned_by_user(sk)) {
121
122 /* 设置状态为running??? */
123 __set_current_state(TASK_RUNNING);
124
125 /* 拷贝数据到msghdr */
126 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
127 /* Predicted packet is in window by definition.
128 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
129 * Hence, check seq<=rcv_wup reduces to:
130 */
131 /* 有时间戳选项&& 收到的数据段均已确认,更新时间戳 */
132 if (tcp_header_len ==
133 (sizeof(struct tcphdr) +
134 TCPOLEN_TSTAMP_ALIGNED) &&
135 tp->rcv_nxt == tp->rcv_wup)
136 tcp_store_ts_recent(tp);
137
138 /* 接收端RTT估算 */
139 tcp_rcv_rtt_measure_ts(sk, skb);
140
141 __skb_pull(skb, tcp_header_len);
142
143 /* 更新期望接收的序号 */
144 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
145 NET_INC_STATS(sock_net(sk),
146 LINUX_MIB_TCPHPHITSTOUSER);
147 eaten = 1;
148 }
149 }
150
151 /* 未拷贝数据到用户空间,或者拷贝失败 */
152 if (!eaten) {
153 /* 检查校验和 */
154 if (tcp_checksum_complete(skb))
155 goto csum_error;
156
157 /* skb长度> 预分配长度 */
158 if ((int)skb->truesize > sk->sk_forward_alloc)
159 goto step5;
160
161 /* Predicted packet is in window by definition.
162 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
163 * Hence, check seq<=rcv_wup reduces to:
164 */
165 /* 有时间戳选项,且数据均已确认完毕,则更新时间戳 */
166 if (tcp_header_len ==
167 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
168 tp->rcv_nxt == tp->rcv_wup)
169 tcp_store_ts_recent(tp);
170
171 /* 计算RTT */
172 tcp_rcv_rtt_measure_ts(sk, skb);
173
174 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
175
176 /* Bulk data transfer: receiver */
177 /* 数据加入接收队列 */
178 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
179 &fragstolen);
180 }
181
182 tcp_event_data_recv(sk, skb);
183
184 /* 确认序号确认了数据 */
185 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
186 /* Well, only one small jumplet in fast path... */
187 /* 处理ack */
188 tcp_ack(sk, skb, FLAG_DATA);
189 /* 检查是否有数据要发送,需要则发送 */
190 tcp_data_snd_check(sk);
191 /* 没有ack要发送 */
192 if (!inet_csk_ack_scheduled(sk))
193 goto no_ack;
194 }
195
196 /* 检查是否有ack要发送,需要则发送 */
197 __tcp_ack_snd_check(sk, 0);
198 no_ack:
199 /* skb已经复制到用户空间,则释放之 */
200 if (eaten)
201 kfree_skb_partial(skb, fragstolen);
202
203 /* 唤醒用户进程有数据读取 */
204 sk->sk_data_ready(sk);
205 return;
206 }
207 }
208
209 slow_path:
210 /* 长度错误|| 校验和错误 */
211 if (len < (th->doff << 2) || tcp_checksum_complete(skb))
212 goto csum_error;
213
214 /* 无ack,无rst,无syn */
215 if (!th->ack && !th->rst && !th->syn)
216 goto discard;
217
218 /*
219 * Standard slow path.
220 */
221 /* 种种校验 */
222 if (!tcp_validate_incoming(sk, skb, th, 1))
223 return;
224
225 step5:
226 /* 处理ack */
227 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
228 goto discard;
229
230 /* 计算rtt */
231 tcp_rcv_rtt_measure_ts(sk, skb);
232
233 /* Process urgent data. */
234 /* 处理紧急数据 */
235 tcp_urg(sk, skb, th);
236
237 /* step 7: process the segment text */
238 /* 数据段处理 */
239 tcp_data_queue(sk, skb);
240
241 /* 发送数据检查,有则发送 */
242 tcp_data_snd_check(sk);
243
244 /* 发送ack检查,有则发送 */
245 tcp_ack_snd_check(sk);
246 return;
247
248 csum_error:
249 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
250 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
251
252 discard:
253 tcp_drop(sk, skb);
254 }