应用改变sock的拥塞控制算法 
1 
2 
3 
4 
5 
#define SOL_TCP 6
 #define TCP_CONGESTION  13
 
 strcpy(name, "cubic");
 setsockopt (connfd, SOL_TCP, TCP_CONGESTION, name, strlen(name)); 
 
net/socket.c 
1 
2 
3 
4 
5 
6 
7 
8 
9 
SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
 		char __user *, optval, int, optlen)
 {
 	...
 			err =
 				sock->ops->setsockopt(sock, level, optname, optval,
 						  optlen);
 	...
 } 
 
对于ipv4的tcp,sock->ops指向 net/ipv4/af_inet.c 中的 inet_stream_ops,所以setsockopt等于sock_common_setsockopt。
net/core/sock.c 
1 
2 
3 
4 
5 
6 
7 
int sock_common_setsockopt(struct socket *sock, int level, int optname,
 			   char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 
 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
 } 
 
sk_prot 指向 net/ipv4/tcp_ipv4.c 中的 tcp_prot,所以setsockopt等于tcp_setsockopt
net/ipv4/tcp.c 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		   unsigned int optlen)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (level != SOL_TCP)
 		return icsk->icsk_af_ops->setsockopt(sk, level, optname,
 							 optval, optlen);
 	return do_tcp_setsockopt(sk, level, optname, optval, optlen);
 } 
 
因为level = SOL_TCP, optname = TCP_CONGESTION, 所以直接到do_tcp_setsockopt的第一个if里。
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 
21 
22 
23 
24 
25 
26 
27 
28 
static int do_tcp_setsockopt(struct sock *sk, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk); 
 	int val;    
 	int err = 0;
 
 	/* This is a string value all the others are int's */
 	if (optname == TCP_CONGESTION) {    
 		char name[TCP_CA_NAME_MAX]; 
 
 		if (optlen < 1)
 			return -EINVAL;
 
 		val = strncpy_from_user(name, optval,
 					min_t(long, TCP_CA_NAME_MAX-1, optlen));
 		if (val < 0)
 			return -EFAULT;
 		name[val] = 0;
 
 		lock_sock(sk);
 		err = tcp_set_congestion_control(sk, name);
 		release_sock(sk);
 		return err;
 	}
 
 ...
 
 
net/ipv4/tcp_cong.c 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 
21 
22 
23 
24 
25 
26 
27 
28 
29 
30 
31 
32 
33 
34 
35 
36 
37 
38 
39 
40 
41 
42 
43 
/* Change congestion control for socket */
 int tcp_set_congestion_control(struct sock *sk, const char *name)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_congestion_ops *ca;
 	int err = 0;
 
 	rcu_read_lock();
 	ca = tcp_ca_find(name);
 
 	/* no change asking for existing value */
 	if (ca == icsk->icsk_ca_ops)
 		goto out;
 
 #ifdef CONFIG_MODULES
 	/* not found attempt to autoload module */
 	if (!ca && capable(CAP_NET_ADMIN)) {
 		rcu_read_unlock();
 		request_module("tcp_%s", name);
 		rcu_read_lock();
 		ca = tcp_ca_find(name);
 	}
 #endif
 	if (!ca)
 		err = -ENOENT;
 
 	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
 		err = -EPERM;
 
 	else if (!try_module_get(ca->owner))
 		err = -EBUSY;
 
 	else {
 		tcp_cleanup_congestion_control(sk);
 		icsk->icsk_ca_ops = ca;
 
 		if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) // 如果sk->sk_state = TCP_CLOSE, 那么不会调用拥塞控制模块的初始化
 			icsk->icsk_ca_ops->init(sk);
 	}
  out:
 	rcu_read_unlock();
 	return err;
 } 
 
可以看到,如果sk->sk_state = TCP_CLOSE, 那么不会调用拥塞控制模块的初始化。
 
那么什么时候sk->sk_state == TCP_CLOSE,并且还能调用setsockopt呢? 
举一种情况:当收到RST包的时候,tcp_rcv_established()->tcp_validate_incoming()->tcp_reset()->tcp_done()将sk置为TCP_CLOSE。 
如果拥塞控制模块中init有申请内存,release中释放内存。那么在上述情况下将会出现没有申请而直接释放的情况,导致panic。 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
BUG: unable to handle kernel paging request at ffffeba4000002a0
 
 [<ffffffff8115b17e>] kfree+0x6e/0x240
 [<ffffffffa0068055>] cong_release+0x35/0x50 [cong]
 [<ffffffff81467953>] tcp_cleanup_congestion_control+0x23/0x40
 [<ffffffff81465bb9>] tcp_v4_destroy_sock+0x29/0x2d0
 [<ffffffff8144e9e3>] inet_csk_destroy_sock+0x53/0x140
 [<ffffffff814504c0>] tcp_close+0x340/0x4a0
 [<ffffffff814748de>] inet_release+0x5e/0x90
 [<ffffffff813f4359>] sock_release+0x29/0x90
 [<ffffffff813f43d7>] sock_close+0x17/0x40
 [<ffffffff81173ed3>] __fput+0xf3/0x220
 [<ffffffff8117401c>] fput+0x1c/0x30
 [<ffffffff8116df2d>] filp_close+0x5d/0x90
 [<ffffffff8117090c>] sys_close+0xac/0x110
 [<ffffffff8100af72>] system_call_fastpath+0x16/0x1b 
 
测试代码 
congestion_mod_panic