Subject: kern/17752: dead lock in sbxxxxx functions using local sockets
To: None <gnats-bugs@gnats.netbsd.org>
From: Christian Biere <christianbiere@gmx.de>
List: netbsd-bugs
Date: 07/28/2002 22:42:45
>Number: 17752
>Category: kern
>Synopsis: dead locks in sbxxxxx functions using local sockets
>Confidential: yes
>Severity: critical
>Priority: high
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Sun Jul 28 22:43:00 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator: Christian Biere
>Release: NetBSD 1.6D
>Organization:
>Environment:
System: NetBSD localhost 1.6D NetBSD 1.6D (DURON2) #0: Sat Jul 27 08:52:59 CEST
2002 root@localhost:/usr/src/sys/arch/i386/compile/DURON2 i386
Architecture: i386
Machine: i386
>Description:
I am working on a little program which uses AF_INET and AF_LOCAL sockets.
In short a server reads from a file e.g. /dev/zero and clients can connect
to receive chunks from this file. The aim is a distributor for (very good)
random device. EOF and unrecoverable errors are not handled very well but
this isn't critical. The problem is that ice-client runs into a dead lock
in some sbxxxxx functions i.e. sbcompress (and sbdrop in a little modified
version). As I am not the original author and I haven't checked whether I
may treat the source as "open" I have chosen to mark this report confidential.
BTW, I have stripped many code which is not necessary to reproduce this bug.
Therefore the program does not really have much sense.
Regards,
Christian Biere
ice.h:
#if !defined(ICE_H)
#define ICE_H
/* common includes */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <errno.h>
#include <string.h>
#include <strings.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <limits.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "config.h"
#define STRINGIFY(x) #x
#define XSTRINGIFY(x) STRINGIFY(x)
#define msg printf
int setblocking(int fd, int mode) {
int flags;
flags = fcntl(fd, F_GETFL);
if (flags == -1)
msg("fcntl F_GETFL failed: %s",
strerror(errno));
if (mode)
return fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
else
return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
}
/*
* wrap write() so we don't have to care about EAGAIN and EINTR
*/
ssize_t write_retry(int fd, const void *buf, size_t len) {
ssize_t res;
for(;;) {
res = write(fd, buf, len);
if (res >= 0)
break;
else if ((errno != EAGAIN) && (errno != EINTR)) {
msg("write() failed: %s", strerror(errno));
break;
}
}
return res;
}
/*
* wrap read() so we don't have to care about EAGAIN and EINTR
*/
ssize_t read_retry(int fd, void *buf, size_t len) {
ssize_t res;
for (;;) {
res = read(fd, buf, len);
if (res >= 0)
break;
else
if ((errno != EAGAIN) && (errno != EINTR)) {
msg("read() failed: %s", strerror(errno));
break;
}
}
return res;
}
/*
* do_write() doesn't return until all nbytes are written or
* an unrecoverable error occurs
*/
ssize_t do_write(int fd, const void *buf, size_t nbytes)
{
size_t nleft = nbytes;
ssize_t res;
while (nleft > 0) {
res = write_retry(fd, buf, nleft);
if (res <= 0) {
msg("write_retry() failed: %s", strerror(errno));
return res;
}
else {
nleft -= res;
buf += res;
}
}
return nbytes;
}
/*
* do_read() doesn't return until all nbytes are read or
* an unrecoverable error occurs
*/
ssize_t do_read(int fd, void *buf, size_t nbytes)
{
size_t nleft = nbytes;
ssize_t res = 0;
while (nleft > 0) {
res = read_retry(fd, buf, nleft);
if (res <= 0) {
msg("read_retry failed: %s", strerror(errno));
return res;
}
else {
nleft -= res;
buf += res;
}
}
return nbytes;
}
#endif /* ICE_H */
-----------
iced.c:
#include "ice.h"
#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <limits.h>
#include <fcntl.h>
#include <pwd.h>
#ifndef DEV_NULL
#define DEV_NULL "/dev/null"
#endif
#define MAX_CHUNK_SIZE 1024
static unsigned chunk_size;
static int debug;
#define MAXFD 256
#define LISTENQ 5
typedef struct slaveproc Slaveproc;
struct slaveproc {
pid_t pid;
char peer[22]; /* 255.255.255.255/65535\0 */
int fd;
int amount;
volatile int dead;
int deaf; /* this is 1, when write() failed */
int status;
Slaveproc *next;
Slaveproc *prev;
};
static Slaveproc *slaveproc = NULL;
/*
* print usage and exit
*/
void usage(void) {
fputs("Usage: iced [-d] [-e] [-f <facility>] [-l <logfile>] "
"[-p <port>] [-s <n>] [-u <user>] [-v] [entropy source]\n", stderr);
fputs("Options:\n", stderr);
fputs(" -p : Listen on <port>. Default is "
XSTRINGIFY(DEFAULT_PORT) ".\n", stderr);
fputs(" -s : Write <n> bytes at once to clients. Default is 16.\n", stderr);
fputs(" -v : Print debugging messages.\n\n", stderr);
fputs("The optional argument specifies the entropy source. If no"
"entropy source is\nspecified, `"
ENTROPY_SOURCE
"' is used.\n", stderr);
exit(EXIT_SUCCESS);
}
/*
* block/unblock SIGCHLD; needed for list operations
*/
void block_sigchld(int block) {
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
if (block) {
if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)NULL) < 0)
perror("failed to block SIGCHLD");
} else {
if (sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)NULL) < 0)
perror("failed to unblock SIGCHLD");
}
}
void sig_sigchld(int signo) {
pid_t pid;
int stat;
Slaveproc *proc;
pid = wait(&stat);
/*
* find child in list and mark it as dead
*
* IMPORTANT! block SIGCHLD while operating on the slaveproc list!
*/
proc = slaveproc;
while (proc != NULL && proc->pid != pid)
proc = proc->next;
if (proc != NULL)
proc->dead = 1;
}
/*
* just exit() in case a client has closed the connection
*/
ssize_t exit_on_error(ssize_t res) {
if (res == -1)
exit(EXIT_SUCCESS);
return res;
}
void serve_entropy(int in_fd, int client, char *client_addr) {
unsigned char buf[MAX_CHUNK_SIZE];
ssize_t res;
unsigned char cmd, arg = 0;
u_long amount;
ssize_t written;
for (;;) {
res = read_retry(client, &cmd, 1);
if (res == 0) {
exit(EXIT_SUCCESS);
} else if (res != 1)
msg("failed to read next command from client %.21s: %s\n",
client_addr, strerror(errno));
switch(cmd) {
case 0x01:
case 0x02:
res = read_retry(client, &arg, 1);
if (res != 1)
msg("failed to read command argument from client %.21s: %s\n",
client_addr, strerror(errno));
switch (cmd) {
case 0x01:
msg("client %.21s sent `read %d'\n",
client_addr, (int)arg);
arg = arg < chunk_size ? arg : chunk_size;
amount = htonl((u_long)arg);
res = exit_on_error(do_write(in_fd, &amount, sizeof(amount)));
buf[0] = res = do_read(in_fd, buf + 1, arg);
written = do_write(client, buf, res + 1);
if (written != res + 1)
msg("failed to write entropy to client %.21s: %s\n",
client_addr, strerror(errno));
break;
case 0x02:
msg("client %.21s sent `readb %u'\n",
client_addr, (unsigned)arg);
while (arg >= chunk_size) {
amount = htonl((u_long)chunk_size);
exit_on_error(do_write(in_fd, &amount, sizeof(amount)));
buf[0] = res = exit_on_error(read_retry(in_fd, buf + 1, arg));
written = do_write(client, buf, res + 1);
if (written != res + 1)
msg("failed to write entropy to client %.21s: %s\n",
client_addr, strerror(errno));
arg -= res;
}
if (arg > 0) {
amount = htonl((u_long)arg);
exit_on_error(do_write(in_fd, &amount, sizeof(amount)));
buf[0] = res = exit_on_error(read_retry(in_fd, buf + 1, arg));
written = do_write(client, buf, res + 1);
if (written != res + 1)
msg("failed to write entropy to client %.21s: %s\n",
client_addr, strerror(errno));
}
break;
}
break;
default:
msg("client %.21s sent invalid command\n", client_addr);
exit(EXIT_SUCCESS);
/* not reached */
} /* switch */
} /* for (;;) */
}
int main(int argc, char *argv[]) {
pid_t pid;
int listenfd, entropy_fd;
struct sockaddr_in servaddr;
char ch;
char *entropy_source = NULL;
int on = 1;
unsigned port;
debug = 0;
chunk_size = 16;
port = DEFAULT_PORT;
while ((ch = getopt(argc, argv, "def:l:p:s:u:v")) != -1)
switch (ch) {
case 'p':
port = atoi(optarg);
if (port == 0)
msg("invalid port identifier: %.32s\n", optarg);
break;
case 's':
chunk_size = atoi(optarg);
if (chunk_size <= 0 || chunk_size > MAX_CHUNK_SIZE)
msg("invalid block size; valid range is 1...%d\n",
MAX_CHUNK_SIZE);
break;
case 'v':
debug = 1;
break;
case '?':
default:
usage();
}
argc -= optind;
argv += optind;
if (port == 0 || port > 65535) /* don't even trust the default setting */
msg("invalid port identifier: %d\n", port);
if (argc == 1)
entropy_source = argv[0];
else if (argc == 0)
entropy_source = ENTROPY_SOURCE;
else
usage();
/*
* open entropy source
*/
msg("using %.128s as entropy source\n", entropy_source);
entropy_fd = open(entropy_source, O_RDONLY);
if (entropy_fd < 0)
perror("could not open entropy source");
/*
* obtain a server socket
*/
listenfd = socket(AF_INET, SOCK_STREAM, 0);
if (listenfd < 0)
perror("could not create socket");
if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR,
(char *)&on, sizeof(on)) < 0)
perror("could not set socket option");
bzero(&servaddr, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
servaddr.sin_port = htons((u_short)port);
if (bind(listenfd, (struct sockaddr *) &servaddr, sizeof(servaddr)) < 0)
perror("could not bind to address");
if (listen(listenfd, LISTENQ) < 0)
perror("could not listen on socket");
/*
* set listenfd to non-blocking since we perform select() on it
* see: Stevens `Unix Network Programming' Vol.1 Sect. 15.6
*/
if (setblocking(listenfd, 0) == -1)
perror("failed to set listen socket to non-blocking");
signal(SIGCHLD, sig_sigchld);
signal(SIGPIPE, SIG_IGN);
for (;;) {
int connfd, maxfd;
socklen_t len;
struct sockaddr_in cliaddr;
int fd[2];
Slaveproc *proc, *min_amount_proc;
fd_set readset;
struct timeval timeout;
int res, min_amount;
timeout.tv_sec = 5;
timeout.tv_usec = 0;
FD_ZERO(&readset);
FD_SET(listenfd, &readset);
maxfd = listenfd;
if (maxfd < entropy_fd)
maxfd = entropy_fd;
proc = slaveproc;
while (proc != NULL) {
/*
* look for dead children
*/
if (proc->dead) {
Slaveproc *tmp_proc;
close(proc->fd);
msg("peer %.21s got %d bytes of entropy (status=%d)",
proc->peer, proc->amount, proc->status);
/*
* this list is used in the SIGCHLD signal handler (read only);
* block SIGCHLD while we delete an item!
*/
block_sigchld(1);
if (proc->next != NULL)
proc->next->prev = proc->prev;
if (proc->prev != NULL)
proc->prev->next = proc->next;
else
slaveproc = proc->next;
block_sigchld(0);
tmp_proc = proc;
proc = proc->next;
free(tmp_proc);
continue;
}
/*
* the last write to this child failed; probably the client
* has closed the connection, the child has terminated, we just
* do not have a SIGCHLD for it.
*/
if (proc->deaf)
continue;
FD_SET(proc->fd, &readset);
if (maxfd < proc->fd)
maxfd = proc->fd;
proc = proc->next;
}
#if 0
msg("maxfd = %d", maxfd);
#endif
res = select(maxfd + 1, &readset, NULL, NULL, &timeout);
if (res == 0) /* nothing to do */
continue;
if (res < 0) { /* check for error */
if (errno != EINTR)
perror("select() returned an error");
sleep(1);
continue;
}
if (FD_ISSET(listenfd, &readset)) {
char client_addr[256];
char buf[256];
len = sizeof(cliaddr);
connfd = accept(listenfd, (struct sockaddr *) &cliaddr, &len);
if (connfd < 0) {
if (errno == EINTR || errno == EAGAIN) /* no error */
continue;
perror("could not accept connection");
sleep(1);
continue;
}
sprintf(client_addr, "%.15s/%d",
inet_ntop(AF_INET, &(cliaddr.sin_addr), buf, sizeof(buf)),
ntohs(cliaddr.sin_port));
msg("connection from %.32s\n", client_addr);
if (setblocking(connfd, 1) == -1) {
perror("failed to set socket to blocking, closing connection");
close(connfd);
sleep(1);
continue;
}
/*
* create unix domain socket pair: parent <-> child
*/
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, fd) < 0) {
perror("could not create socket pair, closing connection");
close(connfd);
sleep(1);
continue;
}
/*
* chain a new child struct to the head of the doubly linked list
*/
proc = malloc(sizeof(Slaveproc));
if (proc == NULL) {
/*
* poor child, we can't afford it
*/
perror("failed to allocate memory");
sleep(1);
continue;
}
strncpy(proc->peer, client_addr, sizeof(proc->peer));
proc->peer[sizeof(proc->peer) - 1] = '\0';
proc->fd = fd[1];
proc->amount = 0;
proc->dead = 0;
proc->deaf = 0;
proc->status = 0;
proc->next = slaveproc;
proc->prev = NULL;
/*
* block SIGCHLD to squish a race condition (the child may die
* before the slaveproc list is updated
*/
block_sigchld(1);
if ((pid = fork()) == (pid_t)-1) {
perror("fork() failed, closing connection");
close(connfd);
free(proc);
block_sigchld(0);
sleep(1);
continue;
}
if (pid != 0) {
close(connfd);
close(fd[0]);
proc->pid = pid;
if (proc->next != NULL)
proc->next->prev = proc;
slaveproc = proc;
block_sigchld(0);
continue;
} else {
close(listenfd);
close(entropy_fd);
close(fd[1]);
block_sigchld(0);
signal(SIGCHLD, SIG_IGN);
serve_entropy(fd[0], connfd, client_addr);
close(connfd);
exit(EXIT_SUCCESS);
}
} /* if (FD_ISSET(listenfd, &readset)) */
/*
* check for children that are ready to read entropy.
* perform a simple scheduling:
* out of all children that are ready to read entropy select
* the one with the least amount of already consumed entropy
*/
for (proc = slaveproc, min_amount = INT_MAX, min_amount_proc = NULL;
proc != NULL;
proc = proc->next)
if (FD_ISSET(proc->fd, &readset) &&
proc->fd <= maxfd && proc->amount < min_amount) {
min_amount_proc = proc;
min_amount = proc->amount;
}
if (min_amount_proc != NULL) {
unsigned char buf[MAX_CHUNK_SIZE];
ssize_t in, out;
u_long amount;
/*
* check how much entropy the slave wants
*/
res = do_read(min_amount_proc->fd, &amount, sizeof(amount));
if (res == 0) {
/* slaved closed the connection */
min_amount_proc->dead = 1;
continue;
}
if (res != sizeof(amount))
perror("failed to read block size from slave");
amount = ntohl((u_long)amount);
msg("child %u requested %u bytes of entropy\n",
(unsigned)min_amount_proc->pid, (unsigned)amount);
if (amount > chunk_size) {
msg("child %u requested %u bytes, but chunk size is %u\n",
(unsigned)min_amount_proc->pid, (unsigned)amount, chunk_size);
amount = chunk_size;
}
/*
* get amount bytes of entropy
*/
res = do_read(entropy_fd, buf, amount);
if (res < 0)
perror("failed to read from entropy source");
msg("got %u bytes of entropy from source\n", in);
if (amount != in)
msg("wanted %u bytes of entropy, got %d bytes\n", (unsigned)amount, in);
out = write_retry(min_amount_proc->fd, buf, amount);
if (out < 0) {
if (errno == EPIPE)
min_amount_proc->deaf = 1;
else
perror("failed to write to child");
}
msg("wrote %d bytes of entropy to child\n", out);
min_amount_proc->amount += amount;
}
}
exit(EXIT_SUCCESS);
}
----------
ice-client:
#include "ice.h"
#include <netdb.h>
int ice_connect(const char *hostname, unsigned port) {
struct sockaddr_in servaddr;
struct hostent *hptr;
struct in_addr **pptr;
int fd;
if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
perror("socket() failed");
return -1;
}
if ((hptr = gethostbyname(hostname)) == NULL) {
fprintf(stderr, "can't resolve host %s.\n", hostname);
return -1;
}
pptr = (struct in_addr **) hptr->h_addr_list;
while (*pptr != NULL) {
bzero(&servaddr, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_port = htons(port);
memcpy(&servaddr.sin_addr, *pptr, sizeof(struct in_addr));
fprintf(stderr, "Trying %s:%d...\n", inet_ntoa(servaddr.sin_addr), port);
if (connect(fd, (struct sockaddr*) &servaddr, sizeof(servaddr)) < 0) {
fprintf(stderr, "can't connect to `%s:%d': %s\n", hostname,
port, strerror(errno));
close(fd);
} else
break; /* success */
pptr++;
}
if (*pptr == NULL)
return -1;
return fd;
}
void main_loop(int fd) {
unsigned char cmd = 0x01, amount;
for (;;) {
amount = random() & 255;
do_write(fd, &cmd, 1);
do_write(fd, &amount, 1);
do_read(fd, &amount, 1);
}
}
int main(int argc, const char *argv[]) {
int fd;
unsigned port;
if (argc < 3) {
fprintf(stderr, "usage: %s HOSTNAME PORT\n", argv[0]);
exit(EXIT_SUCCESS);
}
port = atoi(argv[2]);
if ((fd = ice_connect(argv[1], port)) == -1)
exit(EXIT_FAILURE);
main_loop(fd);
exit(EXIT_SUCCESS);
}
----------------
config.h:
#define DEFAULT_PORT 12345
#define ENTROPY_SOURCE "/dev/urandom"
-----------------
Makefile:
CC = gcc
DEBUG = -Wall -O -g
CFLAGS = -g -O2 $(DEBUG) -DHAVE_CONFIG_H
LDFLAGS =
LIBS =
all: iced ice-client
iced: iced.c ice.h
$(CC) $(CFLAGS) $(LDFLAGS) -o iced iced.c $(LIBS)
ice-client: ice-client.c ice.h
$(CC) $(CFLAGS) $(LDFLAGS) -o ice-client ice-client.c $(LIBS)
clean:
rm -f *.o iced ice-client core
------------------
>How-To-Repeat:
Compile the included source, start the server and a few clients e.g. 6.
After a few seconds the system should be locked up. Escape to DDB and see
what "tr" says.
$ make
$ ./iced -s 512 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
>Fix:
>Release-Note:
>Audit-Trail:
>Unformatted: