Subject: kern/757: VM subsystem is broken.
To: None <gnats-admin@NetBSD.ORG>
From: Tor Egge <tegge@dsl.unit.no>
List: netbsd-bugs
Date: 01/28/1995 10:20:03
>Number:         757
>Category:       kern
>Synopsis:       Active use of fork causes lockups.
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people (Kernel Bug People)
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sat Jan 28 10:20:02 1995
>Originator:     Tor Egge
>Organization:
Norwegian Institute of Technology, University of Trondheim
"	"
>Release:        1.0A
>Environment:
	EverCom 486 DX266, with vesa localbus. 
	16 MB ram. 1152 MB swap (9x128 MB)
	NetBSD 1.0A kernel, from 28 Januar. 
	libc.so.12.0 
System: NetBSD ikke.idt.unit.no 1.0A NetBSD 1.0A (TEGGE) #24: Sat Jan 28 00:54:07 MET 1995     root@ikke.idt.unit.no:/usr/src/sys/arch/i386/compile/TEGGE i386

>Description:
	

        The machine freezes.

	A debug routine inserted into the kernel showed the following state
        during the lockup (these number may vary slightly from time to time):

	419 inactive non-shared objects (93 without references)
          0 active non-shared objects
        102 inactive shared objects
          0 active shared objects

       75632 K swap allocated.  
        3296 K real memory used.

	The amount of swap allocated is too high, since the test
	program uses less than 3 MB of memory, and only 11 instances exists
	at any given time. 


	A perl script triggering the same fault caused over 320 MB of swap
	to be allocated. The output from 'pstat -s' less than 2 minutes before
	freeze:
Device      512-blocks     Used    Avail Capacity  Type
/dev/wd0b       262080    73640   188440    28%    Interleaved
/dev/vnd0b      262144    73672   188472    28%    Interleaved
/dev/vnd1b      262144    73704   188440    28%    Interleaved
/dev/vnd2b      262144    73704   188440    28%    Interleaved
/dev/vnd3b      262144    73704   188440    28%    Interleaved
/dev/vnd4b      262144    73704   188440    28%    Interleaved
/dev/vnd5b      262144    73704   188440    28%    Interleaved
/dev/vnd6b      262144    73704   188440    28%    Interleaved
/dev/vnd7b      262144    69736   192408    27%    Interleaved
/dev/vnd8b           0  *** not available for swapping ***
/dev/vnd9b           0  *** not available for swapping ***
/dev/vnd10b          0  *** not available for swapping ***
/dev/vnd11b          0  *** not available for swapping ***
/dev/vnd12b          0  *** not available for swapping ***
/dev/vnd13b          0  *** not available for swapping ***
/dev/vnd14b          0  *** not available for swapping ***
/dev/vnd15b          0  *** not available for swapping ***
Total          2359232   659272  1699960    28%

	And the ps output:
  UID   PID  PPID CPU PRI NI   VSZ  RSS WCHAN  STAT TT       TIME COMMAND
    0     0     0   0 -18  0     0    0 schedu DLs  ??    0:00.01 (swapper)
    0     1     0   0  10  0   360   16 wait   Is   ??    0:00.06 /sbin/init
    0     2     0   1 -18  0     0   12 thrd_s DL   ??    1:16.41 (pagedaemon)
    0    19     1   3  10  0 30164  148 mfsidl Ss   ??    0:00.35 mfs -s 60000 
    0    46     1   0   2  0    56  204 select Is   ??    0:00.09 portmap
    0    49     1   0   2  0    72  212 select Ss   ??    0:00.75 ypbind
    0    53     1   0   2  0   268   16 select Is   ??    0:00.02 mountd
    0    55     1   0   2  0    84   16 netcon Is   ??    0:00.02 nfsd: master 
    0    57    55   0   2  0    72   16 nfsd   I    ??    0:00.01 nfsd: server 
    0    58    55   0   2  0    72   16 nfsd   I    ??    0:00.01 nfsd: server 
    0    59    55   0   2  0    72   16 nfsd   I    ??    0:00.00 nfsd: server 
    0    60    55   0   2  0    72   16 nfsd   I    ??    0:00.01 nfsd: server 
    0    64     1   2  10  0    60   16 nfsidl S    ??    0:01.60 nfsiod -n 4
    0    65     1   0  10  0    60   16 nfsidl S    ??    0:00.24 nfsiod -n 4
    0    66     1   0  10  0    60   16 nfsidl S    ??    0:00.04 nfsiod -n 4
    0    67     1   0  10  0    60   16 nfsidl I    ??    0:00.02 nfsiod -n 4
    0    70     1   0   2  0   196  288 select S    ??    0:00.58 amd -l syslog
    0    81     1   0   2  0    84  224 select Ss   ??    0:00.33 syslogd
    0    97     1   0  18  0    12  124 pause  Ss   ??    0:00.79 update
    0    99     1   0  18  0   224  236 pause  Is   ??    0:00.21 cron
    0   105     1   0   2  0    64  168 select Is   ??    0:00.04 lpd
    0   108     1   0   2  0   304  160 netcon Is   ??    0:00.05 sendmail: acc
    0   111     1   0   2  0    96  236 select Is   ??    0:00.21 inetd
    0   142     1   0   2  0   308  532 select Ss   ??    0:08.07 xterm -ls -n 
    0   491     1   0   2  0   308  592 select Is   ??    0:03.69 xterm -ls -n 
26850   143   142   0   3  0   664  392 ttyin  Is+  p0    0:02.90 -bash (bash)
26850   179   143  17  32  0  2752 2400 -      R    p0    1:31.98 /export/archi
26850   993   179   3  28  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1183   179   5  29  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1542   179   4  29  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1623   143   3  18  0   664  228 pause  S    p0    0:00.08 -bash (bash)
26850  1685   179   2  28  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1700   179   3  28  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1701   179   6  29  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1702   179   4  -5  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1711   179  13  31  0     0    0 -      Z    p0    0:00.00 (perl)
26850  1712   179   9   2  0  2752  496 select S    p0    0:00.13 /export/archi
26850  1713  1623   5  29  0   256  172 -      R    p0    0:00.05 ps axlg
26850  1714   179   1  32  0  2752   68 -      R    p0    0:00.01 (perl)
26850   499   491   1   3  0   660  376 ttyin  Is+  p1    0:01.08 -bash (bash)
    0   125     1 147  18 -12   232  268 pause  S<   v0-   0:00.91 /store/bin/xn
    0   134     1   0   3  0    44  180 ttyin  Is+  v0    0:00.06 /usr/libexec/


	Also this perl script was limited to 11 instances, (1 parent, 10 children), with less than 3 MB of memory used for each instance.

>How-To-Repeat:

On a machine with 16 Mb physical memory, and >40 MB swap, run this program.

#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/wait.h>
#include <errno.h>

char childspawned[64000];
#define EATMEMSIZE 640000
int eatmem[EATMEMSIZE];
int childcount;
int child;
int parent;
int pid;


void spawnchild()
{
     int i;
  if ((child=fork())) {
    assert(!childspawned[child]);
    childspawned[child]=1;
    printf("Child %d forked\n",child);
    childcount++;
    for (i=0;i<EATMEMSIZE;i++)
      eatmem[i]=child;
  } else {
    childcount=0;
    pid=getpid();
    for (i=0;i<EATMEMSIZE;i++)
      eatmem[i]=EATMEMSIZE-pid;
    sleep(5);
    exit(0);
  }
}

void waitchild()
{
  
  do {
    child=waitpid(-1,0,WNOHANG);
    if (child<0 && errno==EINTR)
      continue;
    if (child>0) {
      printf("Child %d returned, childspawned[%d]=%d\n",
	     child,child,childspawned[child]);
      assert(childspawned[child]);
      childspawned[child]=0;
      childcount--;
    }
  } while (child>0);
}

int main(int argc,char **argv)
{
  while (1) {
    while (childcount<10) 
      spawnchild();
    waitchild();
  }
}

>Fix:
	Rewrite vm subsystem.
>Audit-Trail:
>Unformatted: