Subject: Re: split into n chunks
To: None <tech-userlevel@netbsd.org>
From: Jan Schaumann <jschauma@netmeister.org>
List: tech-userlevel
Date: 05/29/2007 21:20:46
--IDYEmSnFhs3mNXr+
Content-Type: multipart/mixed; boundary="JgQwtEuHJzHdouWu"
Content-Disposition: inline
--JgQwtEuHJzHdouWu
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable
Jachym Holecek <freza@NetBSD.org> wrote:
=20
> # Jan Schaumann 2007-05-28:
> > I've had the need to split a file into N chunks. Now I could do the
> > math myself by looking at the file size and the specifying "-b" with the
> > appropriate number, but I figured it might be useful to let split(1) do
> > this for me.
> >=20
> > The attached diff adds the "-n chunk_count" flag to split(1).
=20
> Considering
>=20
> st_size =3D 100
> chunks =3D 6
>=20
> yields
>=20
> bcnt =3D 100/6 =3D 16
> bcnt =3D 16 + 100%6 =3D 16 + 4 =3D 20
>=20
> thus five files would be created -- not what the user asked for.
Ah, yes. Here's another patch.
-Jan
--=20
Ancient Principle of WYGIWYGAINGW:
What You Get Is What You're Given, And It's No Good Whining.
--Terry Pratchett et al. (The Science of Discworld)
--JgQwtEuHJzHdouWu
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff
Content-Transfer-Encoding: quoted-printable
Index: split.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/usr.bin/split/split.c,v
retrieving revision 1.21
diff -b -u -r1.21 split.c
--- split.c 5 Jan 2004 23:23:37 -0000 1.21
+++ split.c 30 May 2007 04:18:48 -0000
@@ -62,8 +62,9 @@
=20
int main(int, char **);
static void newfile(void);
-static void split1(off_t);
+static void split1(off_t, int);
static void split2(off_t);
+static void split3(off_t);
static void usage(void) __attribute__((__noreturn__));
static size_t bigwrite(int, void const *, size_t);
=20
@@ -75,8 +76,9 @@
char const *base;
off_t bytecnt =3D 0; /* Byte count to split on. */
off_t numlines =3D 0; /* Line count to split on. */
+ off_t chunks =3D 0; /* Number of chunks to split into. */
=20
- while ((ch =3D getopt(argc, argv, "0123456789b:l:a:")) !=3D -1)
+ while ((ch =3D getopt(argc, argv, "0123456789b:l:a:n:")) !=3D -1)
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@@ -119,6 +121,12 @@
*ep !=3D '\0')
errx(1, "%s: illegal suffix length.", optarg);
break;
+ case 'n': /* Chunks. */
+ if (!isdigit((unsigned char)optarg[0]) ||
+ (chunks =3D (size_t)strtoul(optarg, &ep, 10)) =3D=3D 0 ||
+ *ep !=3D '\0')
+ errx(1, "%s: illegal number of chunks.", optarg);
+ break;
default:
usage();
}
@@ -143,11 +151,16 @@
=20
if (numlines =3D=3D 0)
numlines =3D DEFLINE;
- else if (bytecnt)
+ else if (bytecnt || chunks)
+ usage();
+
+ if (bytecnt && chunks)
usage();
=20
if (bytecnt)
- split1(bytecnt);
+ split1(bytecnt, 0);
+ else if (chunks)
+ split3(chunks);
else
split2(numlines);
=20
@@ -159,12 +172,15 @@
* Split the input by bytes.
*/
static void
-split1(off_t bytecnt)
+split1(off_t bytecnt, int maxcnt)
{
off_t bcnt;
ssize_t dist, len;
char *C;
char bfr[MAXBSIZE];
+ int nfiles;
+
+ nfiles =3D 0;
=20
for (bcnt =3D 0;;)
switch (len =3D read(ifd, bfr, MAXBSIZE)) {
@@ -176,9 +192,12 @@
/* NOTREACHED */
default:
if (!file_open) {
+ if (!maxcnt || (nfiles < maxcnt)) {
newfile();
+ nfiles++;
file_open =3D 1;
}
+ }
if (bcnt + len >=3D bytecnt) {
/* LINTED: bytecnt - bcnt <=3D len */
dist =3D bytecnt - bcnt;
@@ -188,14 +207,20 @@
for (C =3D bfr + dist; len >=3D bytecnt;
/* LINTED: bytecnt <=3D len */
len -=3D bytecnt, C +=3D bytecnt) {
+ if (!maxcnt || (nfiles < maxcnt)) {
newfile();
+ nfiles++;
+ }
/* LINTED: as above */
if (bigwrite(ofd,
C, bytecnt) !=3D bytecnt)
err(1, "write");
}
if (len) {
+ if (!maxcnt || (nfiles < maxcnt)) {
newfile();
+ nfiles++;
+ }
/* LINTED: len >=3D 0 */
if (bigwrite(ofd, C, len) !=3D len)
err(1, "write");
@@ -258,6 +283,29 @@
}
=20
/*
+ * split3 --
+ * Split the input into specified number of chunks
+ */
+static void
+split3(off_t chunks)
+{
+ struct stat sb;
+
+ if (fstat(ifd, &sb) =3D=3D -1) {
+ err(1, "stat");
+ /* NOTREACHED */
+ }
+
+ if (chunks > sb.st_size) {
+ errx(1, "can't split into more than %d files",
+ (int)sb.st_size);
+ /* NOTREACHED */
+ }
+
+ split1(sb.st_size/chunks, chunks);
+}
+
+/*
* newfile --
* Open a new output file.
*/
@@ -308,7 +356,7 @@
usage(void)
{
(void)fprintf(stderr,
-"usage: %s [-b byte_count] [-l line_count] [-a suffix_length] "
+"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_len=
gth] "
"[file [prefix]]\n", getprogname());
exit(1);
}
--JgQwtEuHJzHdouWu--
--IDYEmSnFhs3mNXr+
Content-Type: application/pgp-signature
Content-Disposition: inline
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (NetBSD)
iD8DBQFGXPuefFtkr68iakwRAt6ZAKDXxNeprN6DQ7ywSWF4J1x4pVB1OwCeInIL
GU6JYj/Prr3jyC3Lp5DKebk=
=wjuN
-----END PGP SIGNATURE-----
--IDYEmSnFhs3mNXr+--