写入已关闭的本地 TCP 套接字不会失败-IT科技

摘要：问题描述：我的套接字似乎有问题。下面，您将看到一些分叉服务器和客户端的代码。服务器打开一个 TCP 套接字，客户端连接到它然后关闭它。使用休眠来协调时间。在客户端 close() 之后，服务器尝试 write() 到其自己的 TCP 连接端。根据 write(2) 手册页，这应该给我一个 SIGPIPE 和一...

问题描述：

我的套接字似乎有问题。下面，您将看到一些分叉服务器和客户端的代码。服务器打开一个 TCP 套接字，客户端连接到它然后关闭它。使用休眠来协调时间。在客户端 close() 之后，服务器尝试 write() 到其自己的 TCP 连接端。根据 write(2) 手册页，这应该给我一个 SIGPIPE 和一个 EPIPE errno。但是，我没有看到这一点。从服务器的角度来看，写入本地关闭的套接字成功，如果没有 EPIPE，我看不出服务器应该如何检测客户端是否已关闭套接字。

在客户端关闭其一端和服务器尝试写入之间的间隙中，对 netstat 的调用将显示连接处于 CLOSE_WAIT/FIN_WAIT2 状态，因此服务器端肯定能够拒绝写入。

作为参考，我在 Debian Squeeze 上，uname -r 是 2.6.39-bpo.2-amd64。

这里发生了什么事？

#include &lt;stdio.h>
#include &lt;unistd.h>
#include &lt;sys/types.h>
#include &lt;sys/wait.h>
#include &lt;sys/socket.h>
#include &lt;sys/select.h>
#include &lt;netinet/tcp.h>
#include &lt;errno.h>
#include &lt;string.h>
#include &lt;stdlib.h>
#include &lt;fcntl.h>

#include &lt;netdb.h>

#define SERVER_ADDRESS &quot;127.0.0.7&quot;
#define SERVER_PORT 4777


#define myfail_if( test, msg ) do { if((test)){ fprintf(stderr, msg &quot;
&quot;); exit(1); } } while (0)
#define myfail_unless( test, msg ) myfail_if( !(test), msg )

int connect_client( char *addr, int actual_port )
{
    int client_fd;

    struct addrinfo hint;
    struct addrinfo *ailist, *aip;


    memset( &amp;hint, &#039;&#039;, sizeof( struct addrinfo ) );
    hint.ai_socktype = SOCK_STREAM;

    myfail_if( getaddrinfo( addr, NULL, &amp;hint, &amp;ailist ) != 0, &quot;getaddrinfo failed.&quot; );

    int connected = 0;
    for( aip = ailist; aip; aip = aip->ai_next ) {
        ((struct sockaddr_in *)aip->ai_addr)->sin_port = htons( actual_port );
        client_fd = socket( aip->ai_family, aip->ai_socktype, aip->ai_protocol );

        if( client_fd == -1) { continue; }
        if( connect( client_fd, aip->ai_addr, aip->ai_addrlen) == 0 ) {
            connected = 1;
            break;
        }
        close( client_fd );
    }

    freeaddrinfo( ailist );

    myfail_unless( connected, &quot;Didn&#039;t connect.&quot; );
    return client_fd;
}


void client(){
    sleep(1);
    int client_fd = connect_client( SERVER_ADDRESS, SERVER_PORT );

    printf(&quot;Client closing its fd... &quot;);
    myfail_unless( 0 == close( client_fd ), &quot;close failed&quot; );
    fprintf(stdout, &quot;Client exiting.
&quot;);
    exit(0);
}


int init_server( struct sockaddr * saddr, socklen_t saddr_len )
{
    int sock_fd;

    sock_fd = socket( saddr->sa_family, SOCK_STREAM, 0 );
    if ( sock_fd &lt; 0 ){
        return sock_fd;
    }

    myfail_unless( bind( sock_fd, saddr, saddr_len ) == 0, &quot;Failed to bind.&quot; );
    return sock_fd;
}

int start_server( const char * addr, int port )
{
    struct addrinfo *ailist, *aip;
    struct addrinfo hint;
    int sock_fd;

    memset( &amp;hint, &#039;&#039;, sizeof( struct addrinfo ) );
    hint.ai_socktype = SOCK_STREAM;
    myfail_if( getaddrinfo( addr, NULL, &amp;hint, &amp;ailist ) != 0, &quot;getaddrinfo failed.&quot; );

    for( aip = ailist; aip; aip = aip->ai_next ){
        ((struct sockaddr_in *)aip->ai_addr)->sin_port = htons( port );
        sock_fd = init_server( aip->ai_addr, aip->ai_addrlen );
        if ( sock_fd > 0 ){
            break;
        } 
    }
    freeaddrinfo( aip );

    myfail_unless( listen( sock_fd, 2 ) == 0, &quot;Failed to listen&quot; );
    return sock_fd;
}


int server_accept( int server_fd )
{
    printf(&quot;Accepting
&quot;);
    int client_fd = accept( server_fd, NULL, NULL );
    myfail_unless( client_fd > 0, &quot;Failed to accept&quot; );
    return client_fd;
}


void server() {
    int server_fd = start_server(SERVER_ADDRESS, SERVER_PORT);
    int client_fd = server_accept( server_fd );

    printf(&quot;Server sleeping
&quot;);
    sleep(60);

    printf( &quot;Errno before: %s
&quot;, strerror( errno ) );
    printf( &quot;Write result: %d
&quot;, write( client_fd, &quot;123&quot;, 3 ) );
    printf( &quot;Errno after:  %s
&quot;, strerror( errno ) );

    close( client_fd );
}


int main(void){
    pid_t clientpid;
    pid_t serverpid;

    clientpid = fork();

    if ( clientpid == 0 ) {
        client();
    } else {
        serverpid = fork();

        if ( serverpid == 0 ) {
            server();
        }
        else {
            int clientstatus;
            int serverstatus;

            waitpid( clientpid, &amp;clientstatus, 0 );
            waitpid( serverpid, &amp;serverstatus, 0 );

            printf( &quot;Client status is %d, server status is %d
&quot;, 
                    clientstatus, serverstatus );
        }
    }

    return 0;
}

解决方案 1：

writeLinux 手册页中关于和的内容如下EPIPE：

   EPIPE  fd is connected to a pipe or socket whose reading end is closed.
          When this happens the writing process will also receive  a  SIG-
          PIPE  signal.  (Thus, the write return value is seen only if the
          program catches, blocks or ignores this signal.)

当 Linux 使用 apipe或 a时socketpair，它可以并且将检查该对的读取端，正如这两个程序所演示的那样：

void test_socketpair () {
    int pair[2];
    socketpair(PF_LOCAL, SOCK_STREAM, 0, pair);
    close(pair[0]);
    if (send(pair[1], &quot;a&quot;, 1, MSG_NOSIGNAL) &lt; 0) perror(&quot;send&quot;);
}

void test_pipe () {
    int pair[2];
    pipe(pair);
    close(pair[0]);
    signal(SIGPIPE, SIG_IGN);
    if (write(pair[1], &quot;a&quot;, 1) &lt; 0) perror(&quot;send&quot;);
    signal(SIGPIPE, SIG_DFL);
}

Linux 能够做到这一点，因为内核对管道或连接对的另一端具有固有的知识。但是，在使用时connect，套接字的状态由协议堆栈维护。您的测试演示了这种行为，但下面是一个在单个线程中完成所有操作的程序，类似于上面的两个测试：

int a_sock = socket(PF_INET, SOCK_STREAM, 0);
const int one = 1;
setsockopt(a_sock, SOL_SOCKET, SO_REUSEADDR, &amp;one, sizeof(one));
struct sockaddr_in a_sin = {0};
a_sin.sin_port = htons(4321);
a_sin.sin_family = AF_INET;
a_sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
bind(a_sock, (struct sockaddr *)&amp;a_sin, sizeof(a_sin));
listen(a_sock, 1);
int c_sock = socket(PF_INET, SOCK_STREAM, 0);
fcntl(c_sock, F_SETFL, fcntl(c_sock, F_GETFL, 0)|O_NONBLOCK);
connect(c_sock, (struct sockaddr *)&amp;a_sin, sizeof(a_sin));
fcntl(c_sock, F_SETFL, fcntl(c_sock, F_GETFL, 0)&amp;~O_NONBLOCK);
struct sockaddr_in s_sin = {0};
socklen_t s_sinlen = sizeof(s_sin);
int s_sock = accept(a_sock, (struct sockaddr *)&amp;s_sin, &amp;s_sinlen);
struct pollfd c_pfd = { c_sock, POLLOUT, 0 };
if (poll(&amp;c_pfd, 1, -1) != 1) perror(&quot;poll&quot;);
int erropt = -1;
socklen_t errlen = sizeof(erropt);
getsockopt(c_sock, SOL_SOCKET, SO_ERROR, &amp;erropt, &amp;errlen);
if (erropt != 0) { errno = erropt; perror(&quot;connect&quot;); }
puts(&quot;P|Recv-Q|Send-Q|Local Address|Foreign Address|State|&quot;);
char cmd[256];
snprintf(cmd, sizeof(cmd), &quot;netstat -tn | grep &#039;:%hu &#039; | sed &#039;s/  */|/g&#039;&quot;,
         ntohs(s_sin.sin_port));
puts(&quot;before close on client&quot;); system(cmd);
close(c_sock);
puts(&quot;after close on client&quot;); system(cmd);
if (send(s_sock, &quot;a&quot;, 1, MSG_NOSIGNAL) &lt; 0) perror(&quot;send&quot;);
puts(&quot;after send on server&quot;); system(cmd);
puts(&quot;end of test&quot;);
sleep(5);

如果你运行上述程序，你将得到类似这样的输出：

P|Recv-Q|Send-Q|Local Address|Foreign Address|State|
before close on client
tcp|0|0|127.0.0.1:35790|127.0.0.1:4321|ESTABLISHED|
tcp|0|0|127.0.0.1:4321|127.0.0.1:35790|ESTABLISHED|
after close on client
tcp|0|0|127.0.0.1:35790|127.0.0.1:4321|FIN_WAIT2|
tcp|1|0|127.0.0.1:4321|127.0.0.1:35790|CLOSE_WAIT|
after send on server
end of test

这表明write套接字需要一秒钟才能转换到CLOSED状态。要找出发生这种情况的原因，事务的 TCP 转储可能会很有用：

16:45:28 127.0.0.1 > 127.0.0.1
 .809578 IP .35790 > .4321: S 1062313174:1062313174(0) win 32792 &lt;mss 16396,sackOK,timestamp 3915671437 0,nop,wscale 7>
 .809715 IP .4321 > .35790: S 1068622806:1068622806(0) ack 1062313175 win 32768 &lt;mss 16396,sackOK,timestamp 3915671437 3915671437,nop,wscale 7>
 .809583 IP .35790 > .4321: . ack 1 win 257 &lt;nop,nop,timestamp 3915671437 3915671437>
 .840364 IP .35790 > .4321: F 1:1(0) ack 1 win 257 &lt;nop,nop,timestamp 3915671468 3915671437>
 .841170 IP .4321 > .35790: . ack 2 win 256 &lt;nop,nop,timestamp 3915671469 3915671468>
 .865792 IP .4321 > .35790: P 1:2(1) ack 2 win 256 &lt;nop,nop,timestamp 3915671493 3915671468>
 .865809 IP .35790 > .4321: R 1062313176:1062313176(0) win 0

前三行代表三次握手。第四行是FIN客户端发送给服务器的数据包，第五行是ACK服务器确认收到的数据包。第六行是服务器尝试向设置了PUSH标志的客户端发送 1 字节数据。最后一行是客户端RESET数据包，它导致连接的 TCP 状态被释放，这就是为什么netstat上面的测试中第三个命令没有产生任何输出的原因。

因此，服务器直到尝试向其发送一些数据后才知道客户端将重置连接。重置的原因是客户端调用了close，而不是其他东西。

服务器无法确切知道客户端实际发出了什么系统调用，它只能跟踪 TCP 状态。例如，我们可以close用对的调用替换该调用shutdown。

//close(c_sock);
shutdown(c_sock, SHUT_WR);

shutdown和之间的区别close在于shutdown仅控制连接的状态，而还控制代表套接字的文件描述符close的状态。 A不是套接字。shutdown`close`

输出将会随着shutdown改变而不同：

P|Recv-Q|Send-Q|Local Address|Foreign Address|State|
before close on client
tcp|0|0|127.0.0.1:4321|127.0.0.1:56355|ESTABLISHED|
tcp|0|0|127.0.0.1:56355|127.0.0.1:4321|ESTABLISHED|
after close on client
tcp|1|0|127.0.0.1:4321|127.0.0.1:56355|CLOSE_WAIT|
tcp|0|0|127.0.0.1:56355|127.0.0.1:4321|FIN_WAIT2|
after send on server
tcp|1|0|127.0.0.1:4321|127.0.0.1:56355|CLOSE_WAIT|
tcp|1|0|127.0.0.1:56355|127.0.0.1:4321|FIN_WAIT2|
end of test

TCP 转储还会显示一些不同的东西：

17:09:18 127.0.0.1 > 127.0.0.1
 .722520 IP .56355 > .4321: S 2558095134:2558095134(0) win 32792 &lt;mss 16396,sackOK,timestamp 3917101399 0,nop,wscale 7>
 .722594 IP .4321 > .56355: S 2563862019:2563862019(0) ack 2558095135 win 32768 &lt;mss 16396,sackOK,timestamp 3917101399 3917101399,nop,wscale 7>
 .722615 IP .56355 > .4321: . ack 1 win 257 &lt;nop,nop,timestamp 3917101399 3917101399>
 .748838 IP .56355 > .4321: F 1:1(0) ack 1 win 257 &lt;nop,nop,timestamp 3917101425 3917101399>
 .748956 IP .4321 > .56355: . ack 2 win 256 &lt;nop,nop,timestamp 3917101426 3917101425>
 .764894 IP .4321 > .56355: P 1:2(1) ack 2 win 256 &lt;nop,nop,timestamp 3917101442 3917101425>
 .764903 IP .56355 > .4321: . ack 2 win 257 &lt;nop,nop,timestamp 3917101442 3917101442>
17:09:23
 .786921 IP .56355 > .4321: R 2:2(0) ack 2 win 257 &lt;nop,nop,timestamp 3917106464 3917101442>

请注意，最后的重置发生在最后一个ACK数据包发送 5 秒之后。此重置是由于程序关闭时没有正确关闭套接字造成的。ACK重置前从客户端到服务器的数据包与之前不同。这表明客户端未使用close。在 TCP 中，FIN指示实际上表明没有更多数据要发送。但由于 TCP 连接是双向的，因此接收的服务器FIN假定客户端仍可接收数据。在上述情况下，客户端实际上确实接受了数据。

无论客户端使用close还是SHUT_WR来发出，您都可以通过轮询服务器套接字上的可读事件FIN来检测的到达。如果调用后的结果是，那么您就知道已经到达，并且您可以使用该信息执行您想要的操作。FIN`read0FIN`

struct pollfd s_pfd = { s_sock, POLLIN|POLLOUT, 0 };
if (poll(&amp;s_pfd, 1, -1) != 1) perror(&quot;poll&quot;);
if (s_pfd.revents|POLLIN) {
    char c;
    int r;
    while ((r = recv(s_sock, &amp;c, 1, MSG_DONTWAIT)) == 1) {}
    if (r == 0) { /*...FIN received...*/ }
    else if (errno == EAGAIN) { /*...no more data to read for now...*/ }
    else { /*...some other error...*/ perror(&quot;recv&quot;); }
}

现在，如果服务器在尝试写入之前出现问题，那么它实际上会出现SHUT_WR错误。shutdown`EPIPE`

shutdown(s_sock, SHUT_WR);
if (send(s_sock, &quot;a&quot;, 1, MSG_NOSIGNAL) &lt; 0) perror(&quot;send&quot;);

相反，如果您希望客户端向服务器发出立即重置的信号，则可以通过启用 linger 选项强制在大多数 TCP 堆栈上执行此操作，并0在调用之前将 linger 超时设置为close。

struct linger lo = { 1, 0 };
setsockopt(c_sock, SOL_SOCKET, SO_LINGER, &amp;lo, sizeof(lo));
close(c_sock);

经过上述更改，程序的输出变为：

P|Recv-Q|Send-Q|Local Address|Foreign Address|State|
before close on client
tcp|0|0|127.0.0.1:35043|127.0.0.1:4321|ESTABLISHED|
tcp|0|0|127.0.0.1:4321|127.0.0.1:35043|ESTABLISHED|
after close on client
send: Connection reset by peer
after send on server
end of test

在这种情况下，会send立即出现错误，但事实并非如此EPIPE，而是ECONNRESET。 TCP 转储也反映了这一点：

17:44:21 127.0.0.1 > 127.0.0.1
 .662163 IP .35043 > .4321: S 498617888:498617888(0) win 32792 &lt;mss 16396,sackOK,timestamp 3919204411 0,nop,wscale 7>
 .662176 IP .4321 > .35043: S 497680435:497680435(0) ack 498617889 win 32768 &lt;mss 16396,sackOK,timestamp 3919204411 3919204411,nop,wscale 7>
 .662184 IP .35043 > .4321: . ack 1 win 257 &lt;nop,nop,timestamp 3919204411 3919204411>
 .691207 IP .35043 > .4321: R 1:1(0) ack 1 win 257 &lt;nop,nop,timestamp 3919204440 3919204411>

数据RESET包在三次握手完成后立即到达。但是，使用此选项有其危险。如果在到达时另一端的套接字缓冲区中有未读数据RESET，则该数据将被清除，从而导致数据丢失。强制RESET发送通常用于请求/响应式协议。请求的发送者在收到对其请求的完整响应时可以知道不会丢失任何数据。然后，请求发送者可以安全地RESET在连接上强制发送。

解决方案 2：

您有两个套接字 - 一个用于客户端，另一个用于服务器。现在您的客户端正在执行主动关闭。这意味着客户端已启动 TCP 的连接终止（客户端已发送 TCP FIN 段）。

在此阶段，您会看到客户端套接字处于 FIN_WAIT1 状态。那么服务器套接字现在处于什么状态？它处于 CLOSE_WAIT 状态。因此服务器套接字未关闭。

服务器的 FIN 尚未发送。（为什么 - 因为应用程序尚未关闭套接字）。在此阶段，您正在通过服务器套接字进行写入，因此您不会收到错误。

现在，如果您想查看错误，只需在通过套接字写入之前写入 close(client_fd)。

close(client_fd);
printf( &quot;Write result: %d
&quot;, write( client_fd, &quot;123&quot;, 3 ) );

这里服务器套接字不再处于 CLOSE_WAIT 状态，因此您可以看到 write 的返回值为 -ve，表示错误。希望这能解释清楚。

解决方案 3：

在客户端连接套接字后，调用write()一次（第一次）（如示例中所编码的）之后close()，您将在任何后续的 write() 调用中获得预期的EPIPE结果SIGPIPE。

只需尝试添加另一个 write() 来引发错误：

...
printf( &quot;Errno before: %s
&quot;, strerror( errno ) );
printf( &quot;Write result: %d
&quot;, write( client_fd, &quot;123&quot;, 3 ) );
printf( &quot;Errno after:  %s
&quot;, strerror( errno ) );

printf( &quot;Errno before: %s
&quot;, strerror( errno ) );
printf( &quot;Write result: %d
&quot;, write( client_fd, &quot;A&quot;, 1 ) );
printf( &quot;Errno after:  %s
&quot;, strerror( errno ) );
...

输出将是：

Accepting
Server sleeping
Client closing its fd... Client exiting.
Errno before: Success
Write result: 3
Errno after:  Success
Errno before: Success
Client status is 0, server status is 13

由于第二次调用引发，printf()进程终止，因此缺少最后两个 s 的输出。为了避免进程终止，您可能希望让进程忽略。SIGPIPE`write()`SIGPIPE