summaryrefslogtreecommitdiff
path: root/util/sbase/uniq.c
diff options
context:
space:
mode:
Diffstat (limited to 'util/sbase/uniq.c')
-rw-r--r--util/sbase/uniq.c144
1 files changed, 144 insertions, 0 deletions
diff --git a/util/sbase/uniq.c b/util/sbase/uniq.c
new file mode 100644
index 00000000..f1ad6a7b
--- /dev/null
+++ b/util/sbase/uniq.c
@@ -0,0 +1,144 @@
+/* See LICENSE file for copyright and license details. */
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "text.h"
+#include "util.h"
+
+static const char *countfmt = "";
+static int dflag = 0;
+static int uflag = 0;
+static int fskip = 0;
+static int sskip = 0;
+
+static struct line prevl;
+static ssize_t prevoff = -1;
+static long prevlinecount = 0;
+
+static size_t
+uniqskip(struct line *l)
+{
+ size_t i;
+ int f = fskip, s = sskip;
+
+ for (i = 0; i < l->len && f; --f) {
+ while (isblank(l->data[i]))
+ i++;
+ while (i < l->len && !isblank(l->data[i]))
+ i++;
+ }
+ for (; s && i < l->len && l->data[i] != '\n'; --s, i++)
+ ;
+
+ return i;
+}
+
+static void
+uniqline(FILE *ofp, struct line *l)
+{
+ size_t loff;
+
+ if (l) {
+ loff = uniqskip(l);
+
+ if (prevoff >= 0 && (l->len - loff) == (prevl.len - prevoff) &&
+ !memcmp(l->data + loff, prevl.data + prevoff, l->len - loff)) {
+ ++prevlinecount;
+ return;
+ }
+ }
+
+ if (prevoff >= 0) {
+ if ((prevlinecount == 1 && !dflag) ||
+ (prevlinecount != 1 && !uflag)) {
+ if (*countfmt)
+ fprintf(ofp, countfmt, prevlinecount);
+ fwrite(prevl.data, 1, prevl.len, ofp);
+ }
+ prevoff = -1;
+ }
+
+ if (l) {
+ if (!prevl.data || l->len >= prevl.len) {
+ prevl.data = erealloc(prevl.data, l->len);
+ }
+ prevl.len = l->len;
+ memcpy(prevl.data, l->data, prevl.len);
+ prevoff = loff;
+ }
+ prevlinecount = 1;
+}
+
+static void
+uniq(FILE *fp, FILE *ofp)
+{
+ static struct line line;
+ static size_t size;
+ ssize_t len;
+
+ while ((len = getline(&line.data, &size, fp)) > 0) {
+ line.len = len;
+ uniqline(ofp, &line);
+ }
+}
+
+static void
+uniqfinish(FILE *ofp)
+{
+ uniqline(ofp, NULL);
+}
+
+static void
+usage(void)
+{
+ eprintf("usage: %s [-c] [-d | -u] [-f fields] [-s chars]"
+ " [input [output]]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp[2] = { stdin, stdout };
+ int ret = 0, i;
+ char *fname[2] = { "<stdin>", "<stdout>" };
+
+ ARGBEGIN {
+ case 'c':
+ countfmt = "%7ld ";
+ break;
+ case 'd':
+ dflag = 1;
+ break;
+ case 'u':
+ uflag = 1;
+ break;
+ case 'f':
+ fskip = estrtonum(EARGF(usage()), 0, INT_MAX);
+ break;
+ case 's':
+ sskip = estrtonum(EARGF(usage()), 0, INT_MAX);
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ if (argc > 2)
+ usage();
+
+ for (i = 0; i < argc; i++) {
+ if (strcmp(argv[i], "-")) {
+ fname[i] = argv[i];
+ if (!(fp[i] = fopen(argv[i], (i == 0) ? "r" : "w")))
+ eprintf("fopen %s:", argv[i]);
+ }
+ }
+
+ uniq(fp[0], fp[1]);
+ uniqfinish(fp[1]);
+
+ ret |= fshut(fp[0], fname[0]) | fshut(fp[1], fname[1]);
+
+ return ret;
+}