1、TCP协议栈从上到下提供的接口

2、 三次握手
结构体变量struct proto tcp_prot指定了TCP协议栈的访问接口函数
1 struct proto tcp_prot = {
2 .name = "TCP",
3 .owner = THIS_MODULE,
4 .close = tcp_close,
5 .pre_connect = tcp_v4_pre_connect,
6 .connect = tcp_v4_connect,
7 .disconnect = tcp_disconnect,
8 .accept = inet_csk_accept,
9 .ioctl = tcp_ioctl,
10 .init = tcp_v4_init_sock,
11 .destroy = tcp_v4_destroy_sock,
12 .shutdown = tcp_shutdown,
13 .setsockopt = tcp_setsockopt,
14 .getsockopt = tcp_getsockopt,
15 .keepalive = tcp_set_keepalive,
16 .recvmsg = tcp_recvmsg,
17 .sendmsg = tcp_sendmsg,
18 .sendpage = tcp_sendpage,
19 .backlog_rcv = tcp_v4_do_rcv,
20 .release_cb = tcp_release_cb,
21 .hash = inet_hash,
22 .unhash = inet_unhash,
23 .get_port = inet_csk_get_port,
24 .enter_memory_pressure = tcp_enter_memory_pressure,
25 .leave_memory_pressure = tcp_leave_memory_pressure,
26 .stream_memory_free = tcp_stream_memory_free,
27 .sockets_allocated = &tcp_sockets_allocated,
28 .orphan_count = &tcp_orphan_count,
29 .memory_allocated = &tcp_memory_allocated,
30 .memory_pressure = &tcp_memory_pressure,
31 .sysctl_mem = sysctl_tcp_mem,
32 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
33 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
34 .max_header = MAX_TCP_HEADER,
35 .obj_size = sizeof(struct tcp_sock),
36 .slab_flags = SLAB_TYPESAFE_BY_RCU,
37 .twsk_prot = &tcp_timewait_sock_ops,
38 .rsk_prot = &tcp_request_sock_ops,
39 .h.hashinfo = &tcp_hashinfo,
40 .no_autobind = true,
2.1 首先客户端发送SYN报文
tcp_v4_connect函数

1 /* This will initiate an outgoing connection. */
2 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
3 {
4 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
5 struct inet_sock *inet = inet_sk(sk);
6 struct tcp_sock *tp = tcp_sk(sk);
7 __be16 orig_sport, orig_dport;
8 __be32 daddr, nexthop;
9 struct flowi4 *fl4;
10 struct rtable *rt;
11 int err;
12 struct ip_options_rcu *inet_opt;
13 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
14
15 if (addr_len < sizeof(struct sockaddr_in))
16 return -EINVAL;
17
18 if (usin->sin_family != AF_INET)
19 return -EAFNOSUPPORT;
20
21 nexthop = daddr = usin->sin_addr.s_addr;
22 inet_opt = rcu_dereference_protected(inet->inet_opt,
23 lockdep_sock_is_held(sk));
24 if (inet_opt && inet_opt->opt.srr) {
25 if (!daddr)
26 return -EINVAL;
27 nexthop = inet_opt->opt.faddr;
28 }
29
30 orig_sport = inet->inet_sport;
31 orig_dport = usin->sin_port;
32 fl4 = &inet->cork.fl.u.ip4;
33 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
34 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
35 IPPROTO_TCP,
36 orig_sport, orig_dport, sk);
37 if (IS_ERR(rt)) {
38 err = PTR_ERR(rt);
39 if (err == -ENETUNREACH)
40 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
41 return err;
42 }
43
44 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
45 ip_rt_put(rt);
46 return -ENETUNREACH;
47 }
48
49 if (!inet_opt || !inet_opt->opt.srr)
50 daddr = fl4->daddr;
51
52 if (!inet->inet_saddr)
53 inet->inet_saddr = fl4->saddr;
54 sk_rcv_saddr_set(sk, inet->inet_saddr);
55
56 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
57 /* Reset inherited state */
58 tp->rx_opt.ts_recent = 0;
59 tp->rx_opt.ts_recent_stamp = 0;
60 if (likely(!tp->repair))
61 tp->write_seq = 0;
62 }
63
64 inet->inet_dport = usin->sin_port;
65 sk_daddr_set(sk, daddr);
66
67 inet_csk(sk)->icsk_ext_hdr_len = 0;
68 if (inet_opt)
69 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
70
71 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
72
73 /* Socket identity is still unknown (sport may be zero).
74 * However we set state to SYN-SENT and not releasing socket
75 * lock select source port, enter ourselves into the hash tables and
76 * complete initialization after this.
77 */
78 tcp_set_state(sk, TCP_SYN_SENT);
79 err = inet_hash_connect(tcp_death_row, sk);
80 if (err)
81 goto failure;
82
83 sk_set_txhash(sk);
84
85 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
86 inet->inet_sport, inet->inet_dport, sk);
87 if (IS_ERR(rt)) {
88 err = PTR_ERR(rt);
89 rt = NULL;
90 goto failure;
91 }
92 /* OK, now commit destination to socket. */
93 sk->sk_gso_type = SKB_GSO_TCPV4;
94 sk_setup_caps(sk, &rt->dst);
95 rt = NULL;
96
97 if (likely(!tp->repair)) {
98 if (!tp->write_seq)
99 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
100 inet->inet_daddr,
101 inet->inet_sport,
102 usin->sin_port);
103 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
104 inet->inet_saddr,
105 inet->inet_daddr);
106 }
107
108 inet->inet_id = tp->write_seq ^ jiffies;
109
110 if (tcp_fastopen_defer_connect(sk, &err))
111 return err;
112 if (err)
113 goto failure;
114
115 err = tcp_connect(sk);
116
117 if (err)
118 goto failure;
119
120 return 0;
121
122 failure:
123 /*
124 * This unhashes the socket and releases the local port,
125 * if necessary.
126 */
127 tcp_set_state(sk, TCP_CLOSE);
128 ip_rt_put(rt);
129 sk->sk_route_caps = 0;
130 inet->inet_dport = 0;
131 return err;
132 }
2.2 另一头服务端accept等待连接请求

1 /*
2 * This will accept the next outstanding connection.
3 */
4 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
5 {
6 struct inet_connection_sock *icsk = inet_csk(sk);
7 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
8 struct request_sock *req;
9 struct sock *newsk;
10 int error;
11
12 lock_sock(sk);
13
14 /* We need to make sure that this socket is listening,
15 * and that it has something pending.
16 */
17 error = -EINVAL;
18 if (sk->sk_state != TCP_LISTEN)
19 goto out_err;
20
21 /* Find already established connection */
22 if (reqsk_queue_empty(queue)) {
23 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
24
25 /* If this is a non blocking socket don't sleep */
26 error = -EAGAIN;
27 if (!timeo)
28 goto out_err;
29
30 error = inet_csk_wait_for_connect(sk, timeo);
31 if (error)
32 goto out_err;
33 }
34 req = reqsk_queue_remove(queue, sk);
35 newsk = req->sk;
36
37 if (sk->sk_protocol == IPPROTO_TCP &&
38 tcp_rsk(req)->tfo_listener) {
39 spin_lock_bh(&queue->fastopenq.lock);
40 if (tcp_rsk(req)->tfo_listener) {
41 /* We are still waiting for the final ACK from 3WHS
42 * so can't free req now. Instead, we set req->sk to
43 * NULL to signify that the child socket is taken
44 * so reqsk_fastopen_remove() will free the req
45 * when 3WHS finishes (or is aborted).
46 */
47 req->sk = NULL;
48 req = NULL;
49 }
50 spin_unlock_bh(&queue->fastopenq.lock);
51 }
52 out:
53 release_sock(sk);
54 if (req)
55 reqsk_put(req);
56 return newsk;
57 out_err:
58 newsk = NULL;
59 req = NULL;60 *err = error;
61 goto out;
62 }
inet_csk_wait_for_connect函数
1 /*
2 * Wait for an incoming connection, avoid race conditions. This must be called
3 * with the socket locked.
4 */
5 static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
6 {
7 struct inet_connection_sock *icsk = inet_csk(sk);
8 DEFINE_WAIT(wait);
9 int err;
10
11 /*
12 * True wake-one mechanism for incoming connections: only
13 * one process gets woken up, not the 'whole herd'.
14 * Since we do not 'race & poll' for established sockets
15 * anymore, the common case will execute the loop only once.
16 *
17 * Subtle issue: "add_wait_queue_exclusive()" will be added
18 * after any current non-exclusive waiters, and we know that
19 * it will always _stay_ after any new non-exclusive waiters
20 * because all non-exclusive waiters are added at the
21 * beginning of the wait-queue. As such, it's ok to "drop"
22 * our exclusiveness temporarily when we get woken up without
23 * having to remove and re-insert us on the wait queue.
24 */
25 for (;;) {
26 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
27 TASK_INTERRUPTIBLE);
28 release_sock(sk);
29 if (reqsk_queue_empty(&icsk->icsk_accept_queue))
30 timeo = schedule_timeout(timeo);
31 sched_annotate_sleep();
32 lock_sock(sk);
33 err = 0;
34 if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
35 break;
36 err = -EINVAL;
37 if (sk->sk_state != TCP_LISTEN)
38 break;
39 err = sock_intr_errno(timeo);
40 if (signal_pending(current))
41 break;
42 err = -EAGAIN;
43 if (!timeo)
44 break;
45 }
46 finish_wait(sk_sleep(sk), &wait);
47 return err;
48 }
2.3 三次握手中携带SYN/ACK的TCP头数据的发送和接收
TCP/IP协议栈初始化
inet_init函数
1 static __net_init int inet_init_net(struct net *net)
2 {
3 /*
4 * Set defaults for local port range
5 */
6 seqlock_init(&net->ipv4.ip_local_ports.lock);
7 net->ipv4.ip_local_ports.range[0] = 32768;
8 net->ipv4.ip_local_ports.range[1] = 60999;
9
10 seqlock_init(&net->ipv4.ping_group_range.lock);
11 /*
12 * Sane defaults - nobody may create ping sockets.
13 * Boot scripts should set this to distro-specific group.
14 */
15 net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
16 net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
17
18 /* Default values for sysctl-controlled parameters.
19 * We set them here, in case sysctl is not compiled.
20 */
21 net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
22 net->ipv4.sysctl_ip_fwd_update_priority = 1;
23 net->ipv4.sysctl_ip_dynaddr = 0;
24 net->ipv4.sysctl_ip_early_demux = 1;
25 net->ipv4.sysctl_udp_early_demux = 1;
26 net->ipv4.sysctl_tcp_early_demux = 1;
27 #ifdef CONFIG_SYSCTL
28 net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
29 #endif
30
31 /* Some igmp sysctl, whose values are always used */
32 net->ipv4.sysctl_igmp_max_memberships = 20;
33 net->ipv4.sysctl_igmp_max_msf = 10;
34 /* IGMP reports for link-local multicast groups are enabled by default */
35 net->ipv4.sysctl_igmp_llm_reports = 1;
36 net->ipv4.sysctl_igmp_qrv = 2;
37
38 return 0;
39 }
2.4 服务端接收客户端发来的SYN,发送SYN+ACK
tcp_v4_do_rcv函数
1 * The socket must have it's spinlock held when we get
2 * here, unless it is a TCP_LISTEN socket.
3 *
4 * We have a potential double-lock case here, so even when
5 * doing backlog processing we use the BH locking scheme.
6 * This is because we cannot sleep with the original spinlock
7 * held.
8 */
9 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
10 {
11 struct sock *rsk;
12
13 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
14 struct dst_entry *dst = sk->sk_rx_dst;
15
16 sock_rps_save_rxhash(sk, skb);
17 sk_mark_napi_id(sk, skb);
18 if (dst) {
19 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
20 !dst->ops->check(dst, 0)) {
21 dst_release(dst);
22 sk->sk_rx_dst = NULL;
23 }
24 }
25 tcp_rcv_established(sk, skb);
26 return 0;
27 }
28
29 if (tcp_checksum_complete(skb))
30 goto csum_err;
31
32 if (sk->sk_state == TCP_LISTEN) {
33 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
34
35 if (!nsk)
36 goto discard;
37 if (nsk != sk) {
38 if (tcp_child_process(sk, nsk, skb)) {
39 rsk = nsk;
40 goto reset;
41 }
42 return 0;
43 }
44 } else
45 sock_rps_save_rxhash(sk, skb);
46
47 if (tcp_rcv_state_process(sk, skb)) {
48 rsk = sk;
49 goto reset;
50 }
51 return 0;
52
53 reset:
54 tcp_v4_send_reset(rsk, skb);
55 discard:
56 kfree_skb(skb);
57 /* Be careful here. If this function gets more complicated and
58 * gcc suffers from register pressure on the x86, sk (in %ebx)
59 * might be destroyed here. This current version compiles correctly,
60 * but you have been warned.
61 */
62 return 0;
63
64 csum_err:
65 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
66 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
67 goto discard;
68 }
2.5 客户端收到服务端的SYN+ACK,发送ACK
tcp_rcv_synsent_state_proces函数
1 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
2 const struct tcphdr *th, unsigned int len)
3 {
4 ..
5 tcp_send_ack(sk);
6 ...
7 }
到这里我们已经从linux网络核心的角度从架构上整体理解了三次握手,即携带SYN/ACK标志的数据收发过程。
3、gdb调试过程


(gdb) c
Continuing.
Breakpoint 1, __sys_socket (family=2, type=1, protocol=0) at net/socket.c:1346
1346 retval = sock_create(family, type, protocol, &sock);
(gdb) c
Continuing.
Breakpoint 2, __sys_accept4 (fd=4, upeer_sockaddr=0xffbb869c,
upeer_addrlen=0xffbb867c, flags=0) at net/socket.c:1542
1542 {
(gdb) c
Continuing.
Breakpoint 1, __sys_socket (family=2, type=1, protocol=0) at net/socket.c:1346
1346 retval = sock_create(family, type, protocol, &sock);
(gdb) c
Continuing.
Breakpoint 3, tcp_v4_connect (sk=0xffff888006498880, uaddr=0xffffc90000043e20,
addr_len=16) at net/ipv4/tcp_ipv4.c:203
203 {
(gdb) c
Continuing.
Breakpoint 4, tcp_v4_rcv (skb=0xffff8880068ed4e0) at net/ipv4/tcp_ipv4.c:1782
1782 {
(gdb) c
Continuing.
Breakpoint 4, tcp_v4_rcv (skb=0xffff888007584000) at net/ipv4/tcp_ipv4.c:1782
1782 {
(gdb) c
Continuing.
Breakpoint 4, tcp_v4_rcv (skb=0xffff888007584100) at net/ipv4/tcp_ipv4.c:1782
1782 {
(gdb) c
Continuing.
Breakpoint 4, tcp_v4_rcv (skb=0xffff8880068ed4e0) at net/ipv4/tcp_ipv4.c:1782
1782 {
(gdb) c
Continuing.
Breakpoint 4, tcp_v4_rcv (skb=0xffff888007584100) at net/ipv4/tcp_ipv4.c:1782
1782 {
(gdb) c
Continuing.
Breakpoint 2, __sys_accept4 (fd=4, upeer_sockaddr=0xffbb869c,
upeer_addrlen=0xffbb867c, flags=0) at net/socket.c:1542
1542 {
(gdb)
来源:https://www.cnblogs.com/yusi007/p/12102320.html