From e9a910b33c7837b4b868e3abda18eb4810df7f02 Mon Sep 17 00:00:00 2001 From: Leah Rowe Date: Sat, 4 Oct 2025 09:14:33 +0100 Subject: config/git: import suckless sbase i currently use the output of sha512sum in several places of xbmk, which is a bit unreliable in case output changes. other cases where i use util outputs in variables are probably reliable, because i'm using mostly posix utilities in those. to mitigate this, i now import suckless sbase, which has a reasonable sha512sum implementation. *every* binary it builds is being placed in build.list, because i'll probably start using more of them. for example, i may start modifying the "date" implementation, adding the GNU-specific options that i need as mentioned on init.sh i'm importing it in util/ because the sha512sum util is needed for verifying project sources, so if sbase itself is a "project source", that means we can into a chicken and egg bootstrapping problem. this is sbase at revision: 055cc1ae1b3a13c3d8f25af0a4a3316590efcd48 Signed-off-by: Leah Rowe --- util/sbase/join.c | 529 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 529 insertions(+) create mode 100644 util/sbase/join.c (limited to 'util/sbase/join.c') diff --git a/util/sbase/join.c b/util/sbase/join.c new file mode 100644 index 00000000..d3e23439 --- /dev/null +++ b/util/sbase/join.c @@ -0,0 +1,529 @@ +/* See LICENSE file for copyright and license details. */ +#include +#include +#include +#include +#include + +#include "text.h" +#include "utf.h" +#include "util.h" + +enum { + INIT = 1, + GROW = 2, +}; + +enum { + EXPAND = 0, + RESET = 1, +}; + +enum { FIELD_ERROR = -2, }; + +struct field { + char *s; + size_t len; +}; + +struct jline { + struct line text; + size_t nf; + size_t maxf; + struct field *fields; +}; + +struct spec { + size_t fileno; + size_t fldno; +}; + +struct outlist { + size_t ns; + size_t maxs; + struct spec **specs; +}; + +struct span { + size_t nl; + size_t maxl; + struct jline **lines; +}; + +static char *sep = NULL; +static char *replace = NULL; +static const char defaultofs = ' '; +static const int jfield = 1; /* POSIX default join field */ +static int unpairsa = 0, unpairsb = 0; +static int oflag = 0; +static int pairs = 1; +static size_t seplen; +static struct outlist output; + +static void +usage(void) +{ + eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] " + "[-a | -v fileno] [-t delim] file1 file2\n", argv0); +} + +static void +prfield(struct field *fp) +{ + if (fwrite(fp->s, 1, fp->len, stdout) != fp->len) + eprintf("fwrite:"); +} + +static void +prsep(void) +{ + if (sep) + fwrite(sep, 1, seplen, stdout); + else + putchar(defaultofs); +} + +static void +swaplines(struct jline *la, struct jline *lb) +{ + struct jline tmp; + + tmp = *la; + *la = *lb; + *lb = tmp; +} + +static void +prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb) +{ + struct spec *sp; + struct field *joinfield; + size_t i; + + if (jfa >= la->nf || jfb >= lb->nf) + return; + + joinfield = &la->fields[jfa]; + + if (oflag) { + for (i = 0; i < output.ns; i++) { + sp = output.specs[i]; + + if (sp->fileno == 1) { + if (sp->fldno < la->nf) + prfield(&la->fields[sp->fldno]); + else if (replace) + fputs(replace, stdout); + } else if (sp->fileno == 2) { + if (sp->fldno < lb->nf) + prfield(&lb->fields[sp->fldno]); + else if (replace) + fputs(replace, stdout); + } else if (sp->fileno == 0) { + prfield(joinfield); + } + + if (i < output.ns - 1) + prsep(); + } + } else { + prfield(joinfield); + prsep(); + + for (i = 0; i < la->nf; i++) { + if (i != jfa) { + prfield(&la->fields[i]); + prsep(); + } + } + for (i = 0; i < lb->nf; i++) { + if (i != jfb) { + prfield(&lb->fields[i]); + if (i < lb->nf - 1) + prsep(); + } + } + } + putchar('\n'); +} + +static void +prline(struct jline *lp) +{ + if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.len) + eprintf("fwrite:"); + putchar('\n'); +} + +static int +jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb) +{ + int status; + + /* return FIELD_ERROR if both lines are short */ + if (jfa >= la->nf) { + status = (jfb >= lb->nf) ? FIELD_ERROR : -1; + } else if (jfb >= lb->nf) { + status = 1; + } else { + status = memcmp(la->fields[jfa].s, lb->fields[jfb].s, + MAX(la->fields[jfa].len, lb->fields[jfb].len)); + LIMIT(status, -1, 1); + } + + return status; +} + +static void +addfield(struct jline *lp, char *sp, size_t len) +{ + if (lp->nf >= lp->maxf) { + lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf), + sizeof(struct field)); + lp->maxf *= GROW; + } + lp->fields[lp->nf].s = sp; + lp->fields[lp->nf].len = len; + lp->nf++; +} + +static void +prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb) +{ + size_t i, j; + + for (i = 0; i < (spa->nl - 1); i++) + for (j = 0; j < (spb->nl - 1); j++) + prjoin(spa->lines[i], spb->lines[j], jfa, jfb); +} + +static struct jline * +makeline(char *s, size_t len) +{ + struct jline *lp; + char *tmp; + size_t i, end; + + if (s[len - 1] == '\n') + s[--len] = '\0'; + + lp = ereallocarray(NULL, INIT, sizeof(struct jline)); + lp->text.data = s; + lp->text.len = len; + lp->fields = ereallocarray(NULL, INIT, sizeof(struct field)); + lp->nf = 0; + lp->maxf = INIT; + + for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++) + ; + while (i < lp->text.len) { + if (sep) { + if ((lp->text.len - i) < seplen || + !(tmp = memmem(lp->text.data + i, + lp->text.len - i, sep, seplen))) { + goto eol; + } + end = tmp - lp->text.data; + addfield(lp, lp->text.data + i, end - i); + i = end + seplen; + } else { + for (end = i; !(isblank(lp->text.data[end])); end++) { + if (end + 1 == lp->text.len) + goto eol; + } + addfield(lp, lp->text.data + i, end - i); + for (i = end; isblank(lp->text.data[i]); i++) + ; + } + } +eol: + addfield(lp, lp->text.data + i, lp->text.len - i); + + return lp; +} + +static int +addtospan(struct span *sp, FILE *fp, int reset) +{ + char *newl = NULL; + ssize_t len; + size_t size = 0; + + if ((len = getline(&newl, &size, fp)) < 0) { + if (ferror(fp)) + eprintf("getline:"); + else + return 0; + } + + if (reset) + sp->nl = 0; + + if (sp->nl >= sp->maxl) { + sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl), + sizeof(struct jline *)); + sp->maxl *= GROW; + } + + sp->lines[sp->nl] = makeline(newl, len); + sp->nl++; + return 1; +} + +static void +initspan(struct span *sp) +{ + sp->nl = 0; + sp->maxl = INIT; + sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *)); +} + +static void +freespan(struct span *sp) +{ + size_t i; + + for (i = 0; i < sp->nl; i++) { + free(sp->lines[i]->fields); + free(sp->lines[i]->text.data); + } + free(sp->lines); +} + +static void +initolist(struct outlist *olp) +{ + olp->ns = 0; + olp->maxs = 1; + olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *)); +} + +static void +addspec(struct outlist *olp, struct spec *sp) +{ + if (olp->ns >= olp->maxs) { + olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs), + sizeof(struct spec *)); + olp->maxs *= GROW; + } + olp->specs[olp->ns] = sp; + olp->ns++; +} + +static struct spec * +makespec(char *s) +{ + struct spec *sp; + int fileno; + size_t fldno; + + if (!strcmp(s, "0")) { /* join field must be 0 and nothing else */ + fileno = 0; + fldno = 0; + } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') { + fileno = s[0] - '0'; + fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - 1; + } else { + eprintf("%s: invalid format\n", s); + } + + sp = ereallocarray(NULL, INIT, sizeof(struct spec)); + sp->fileno = fileno; + sp->fldno = fldno; + return sp; +} + +static void +makeolist(struct outlist *olp, char *s) +{ + char *item, *sp; + sp = s; + + while (sp) { + item = sp; + sp = strpbrk(sp, ", \t"); + if (sp) + *sp++ = '\0'; + addspec(olp, makespec(item)); + } +} + +static void +freespecs(struct outlist *olp) +{ + size_t i; + + for (i = 0; i < olp->ns; i++) + free(olp->specs[i]); +} + +static void +join(FILE *fa, FILE *fb, size_t jfa, size_t jfb) +{ + struct span spa, spb; + int cmp, eofa, eofb; + + initspan(&spa); + initspan(&spb); + cmp = eofa = eofb = 0; + + addtospan(&spa, fa, RESET); + addtospan(&spb, fb, RESET); + + while (spa.nl && spb.nl) { + if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb)) < 0) { + if (unpairsa) + prline(spa.lines[0]); + if (!addtospan(&spa, fa, RESET)) { + if (unpairsb) { /* a is EOF'd; print the rest of b */ + do + prline(spb.lines[0]); + while (addtospan(&spb, fb, RESET)); + } + eofa = eofb = 1; + } else { + continue; + } + } else if (cmp > 0) { + if (unpairsb) + prline(spb.lines[0]); + if (!addtospan(&spb, fb, RESET)) { + if (unpairsa) { /* b is EOF'd; print the rest of a */ + do + prline(spa.lines[0]); + while (addtospan(&spa, fa, RESET)); + } + eofa = eofb = 1; + } else { + continue; + } + } else if (cmp == 0) { + /* read all consecutive matching lines from a */ + do { + if (!addtospan(&spa, fa, EXPAND)) { + eofa = 1; + spa.nl++; + break; + } + } while (jlinecmp(spa.lines[spa.nl-1], spb.lines[0], jfa, jfb) == 0); + + /* read all consecutive matching lines from b */ + do { + if (!addtospan(&spb, fb, EXPAND)) { + eofb = 1; + spb.nl++; + break; + } + } while (jlinecmp(spa.lines[0], spb.lines[spb.nl-1], jfa, jfb) == 0); + + if (pairs) + prspanjoin(&spa, &spb, jfa, jfb); + + } else { /* FIELD_ERROR: both lines lacked join fields */ + if (unpairsa) + prline(spa.lines[0]); + if (unpairsb) + prline(spb.lines[0]); + eofa = addtospan(&spa, fa, RESET) ? 0 : 1; + eofb = addtospan(&spb, fb, RESET) ? 0 : 1; + if (!eofa && !eofb) + continue; + } + + if (eofa) { + spa.nl = 0; + } else { + swaplines(spa.lines[0], spa.lines[spa.nl - 1]); /* ugly */ + spa.nl = 1; + } + + if (eofb) { + spb.nl = 0; + } else { + swaplines(spb.lines[0], spb.lines[spb.nl - 1]); /* ugly */ + spb.nl = 1; + } + } + freespan(&spa); + freespan(&spb); +} + + +int +main(int argc, char *argv[]) +{ + size_t jf[2] = { jfield, jfield, }; + FILE *fp[2]; + int ret = 0, n; + char *fno; + + ARGBEGIN { + case '1': + jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX)); + break; + case '2': + jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX)); + break; + case 'a': + fno = EARGF(usage()); + if (strcmp(fno, "1") == 0) + unpairsa = 1; + else if (strcmp(fno, "2") == 0) + unpairsb = 1; + else + usage(); + break; + case 'e': + replace = EARGF(usage()); + break; + case 'o': + oflag = 1; + initolist(&output); + makeolist(&output, EARGF(usage())); + break; + case 't': + sep = EARGF(usage()); + break; + case 'v': + pairs = 0; + fno = EARGF(usage()); + if (strcmp(fno, "1") == 0) + unpairsa = 1; + else if (strcmp(fno, "2") == 0) + unpairsb = 1; + else + usage(); + break; + default: + usage(); + } ARGEND + + if (sep) + seplen = unescape(sep); + + if (argc != 2) + usage(); + + for (n = 0; n < 2; n++) { + if (!strcmp(argv[n], "-")) { + argv[n] = ""; + fp[n] = stdin; + } else if (!(fp[n] = fopen(argv[n], "r"))) { + eprintf("fopen %s:", argv[n]); + } + } + + jf[0]--; + jf[1]--; + + join(fp[0], fp[1], jf[0], jf[1]); + + if (oflag) + freespecs(&output); + + if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv[1])) | + fshut(stdout, "")) + ret = 2; + + return ret; +} -- cgit v1.2.1