From e9a910b33c7837b4b868e3abda18eb4810df7f02 Mon Sep 17 00:00:00 2001 From: Leah Rowe Date: Sat, 4 Oct 2025 09:14:33 +0100 Subject: config/git: import suckless sbase i currently use the output of sha512sum in several places of xbmk, which is a bit unreliable in case output changes. other cases where i use util outputs in variables are probably reliable, because i'm using mostly posix utilities in those. to mitigate this, i now import suckless sbase, which has a reasonable sha512sum implementation. *every* binary it builds is being placed in build.list, because i'll probably start using more of them. for example, i may start modifying the "date" implementation, adding the GNU-specific options that i need as mentioned on init.sh i'm importing it in util/ because the sha512sum util is needed for verifying project sources, so if sbase itself is a "project source", that means we can into a chicken and egg bootstrapping problem. this is sbase at revision: 055cc1ae1b3a13c3d8f25af0a4a3316590efcd48 Signed-off-by: Leah Rowe --- util/sbase/libutf/mkrunetype.awk | 240 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 util/sbase/libutf/mkrunetype.awk (limited to 'util/sbase/libutf/mkrunetype.awk') diff --git a/util/sbase/libutf/mkrunetype.awk b/util/sbase/libutf/mkrunetype.awk new file mode 100644 index 00000000..e01ea2cc --- /dev/null +++ b/util/sbase/libutf/mkrunetype.awk @@ -0,0 +1,240 @@ +# See LICENSE file for copyright and license details. + +BEGIN { + FS = ";" + # set up hexadecimal lookup table + for(i = 0; i < 16; i++) + hex[sprintf("%X",i)] = i; + HEADER = "/* Automatically generated by mkrunetype.awk */\n#include \n\n#include \"../utf.h\"\n#include \"runetype.h\"\n" + HEADER_OTHER = "/* Automatically generated by mkrunetype.awk */\n#include \"../utf.h\"\n#include \"runetype.h\"\n" +} + +$3 ~ /^L/ { alphav[alphac++] = $1; } +($3 ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; } +$3 == "Cc" { cntrlv[cntrlc++] = $1; } +$3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; } +$3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; } +$3 == "Lt" { titlev[titlec++] = $1; } +$3 == "Nd" { digitv[digitc++] = $1; } + +END { + system("rm -f isalpharune.c isspacerune.c iscntrlrune.c upperrune.c lowerrune.c istitlerune.c isdigitrune.c"); + + mkis("alpha", alphav, alphac, "isalpharune.c", q, ""); + mkis("space", spacev, spacec, "isspacerune.c", q, ""); + mkis("cntrl", cntrlv, cntrlc, "iscntrlrune.c", q, ""); + mkis("upper", upperv, upperc, "upperrune.c", tolowerv, "lower"); + mkis("lower", lowerv, lowerc, "lowerrune.c", toupperv, "upper"); + mkis("title", titlev, titlec, "istitlerune.c", q, ""); + mkis("digit", digitv, digitc, "isdigitrune.c", q, ""); + + system("rm -f isalnumrune.c isblankrune.c isprintrune.c isgraphrune.c ispunctrune.c isxdigitrune.c"); + + otheris(); +} + +# parse hexadecimal rune index to int +function code(s) { + x = 0; + for(i = 1; i <= length(s); i++) { + c = substr(s, i, 1); + x = (x*16) + hex[c]; + } + return x; +} + +# generate 'isrune' unicode lookup function +function mkis(name, runev, runec, file, casev, casename) { + rune1c = 0; + rune2c = 0; + rune3c = 0; + rune4c = 0; + mode = 1; + + #sort rune groups into singletons, ranges and laces + for(j = 0; j < runec; j++) { + # range + if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) || + code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) { + if (mode == 2) { + continue; + } else if (mode == 3) { + rune3v1[rune3c] = runev[j]; + rune3c++; + } else if (mode == 4) { + rune4v1[rune4c] = runev[j]; + rune4c++; + } + mode = 2; + rune2v0[rune2c] = runev[j]; + if(length(casev) > 0) { + case2v[rune2c] = casev[j]; + } + continue; + } + # lace 1 + if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || + (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) && + j+1 < runec) { + if (mode == 3) { + continue; + } else if (mode == 2) { + rune2v1[rune2c] = runev[j]; + rune2c++; + } else if (mode == 4) { + rune4v1[rune2c] = runev[j]; + rune4c++; + } + mode = 3; + rune3v0[rune3c] = runev[j]; + continue; + } + # lace 2 + if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || + (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) && + j+1 < runec) { + if (mode == 4) { + continue; + } else if (mode == 2) { + rune2v1[rune2c] = runev[j]; + rune2c++; + } else if (mode == 3) { + rune3v1[rune2c] = runev[j]; + rune3c++; + } + mode = 4; + rune4v0[rune4c] = runev[j]; + continue; + } + # terminating case + if (mode == 1) { + rune1v[rune1c] = runev[j]; + if (length(casev) > 0) { + case1v[rune1c] = casev[j]; + } + rune1c++; + } else if (mode == 2) { + rune2v1[rune2c] = runev[j]; + rune2c++; + } else if (mode == 3) { + rune3v1[rune3c] = runev[j]; + rune3c++; + } else { #lace 2 + rune4v1[rune4c] = runev[j]; + rune4c++; + } + mode = 1; + } + print HEADER > file; + + #generate list of laces 1 + if(rune3c > 0) { + print "static const Rune "name"3[][2] = {" > file; + for(j = 0; j < rune3c; j++) { + print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" }," > file; + } + print "};\n" > file; + } + + #generate list of laces 2 + if(rune4c > 0) { + print "static const Rune "name"4[][2] = {" > file; + for(j = 0; j < rune4c; j++) { + print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" }," > file; + } + print "};\n" > file; + } + + # generate list of ranges + if(rune2c > 0) { + if(length(casev) > 0) { + print "static const Rune "name"2[][3] = {" > file; + for(j = 0; j < rune2c; j++) { + print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" }," > file; + } + } else { + print "static const Rune "name"2[][2] = {" > file + for(j = 0; j < rune2c; j++) { + print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" }," > file; + } + } + print "};\n" > file; + } + + # generate list of singletons + if(rune1c > 0) { + if(length(casev) > 0) { + print "static const Rune "name"1[][2] = {" > file; + for(j = 0; j < rune1c; j++) { + print "\t{ 0x"rune1v[j]", 0x"case1v[j]" }," > file; + } + } else { + print "static const Rune "name"1[] = {" > file; + for(j = 0; j < rune1c; j++) { + print "\t0x"rune1v[j]"," > file; + } + } + print "};\n" > file; + } + # generate lookup function + print "int\nis"name"rune(Rune r)\n{" > file; + if(rune4c > 0 || rune3c > 0) + print "\tconst Rune *match;\n" > file; + if(rune4c > 0) { + print "\tif((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp)))" > file; + print "\t\treturn !((r - match[0]) % 2);" > file; + } + if(rune3c > 0) { + print "\tif((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp)))" > file; + print "\t\treturn !((r - match[0]) % 2);" > file; + } + if(rune2c > 0) { + print "\tif(bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp))\n\t\treturn 1;" > file; + } + if(rune1c > 0) { + print "\tif(bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp))\n\t\treturn 1;" > file; + } + print "\treturn 0;\n}" > file; + + # generate case conversion function + if(length(casev) > 0) { + print "\nint\nto"casename"rune(Rune r)\n{\n\tRune *match;\n" > file; + if(rune4c > 0) { + print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;" > file; + } + if(rune3c > 0) { + print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;" > file; + } + if(rune2c > 0) { + print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn match[2] + (r - match[0]);" > file; + } + if(rune1c > 0) { + print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp);" > file; + print "\tif (match)" > file; + print "\t\treturn match[1];" > file; + } + print "\treturn r;\n}" > file; + } +} + +function otheris() { + print HEADER_OTHER > "isalnumrune.c"; + print "int\nisalnumrune(Rune r)\n{\n\treturn isalpharune(r) || isdigitrune(r);\n}" > "isalnumrune.c"; + print HEADER_OTHER > "isblankrune.c"; + print "int\nisblankrune(Rune r)\n{\n\treturn r == ' ' || r == '\\t';\n}" > "isblankrune.c"; + print HEADER_OTHER > "isprintrune.c"; + print "int\nisprintrune(Rune r)\n{\n\treturn !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) &&" > "isprintrune.c"; + print "\t ((r < 0xFFF9) || (r > 0xFFFB));\n}" > "isprintrune.c"; + print HEADER_OTHER > "isgraphrune.c"; + print "int\nisgraphrune(Rune r)\n{\n\treturn !isspacerune(r) && isprintrune(r);\n}" > "isgraphrune.c"; + print HEADER_OTHER > "ispunctrune.c"; + print "int\nispunctrune(Rune r)\n{\n\treturn isgraphrune(r) && !isalnumrune(r);\n}" > "ispunctrune.c"; + print HEADER_OTHER > "isxdigitrune.c"; + print "int\nisxdigitrune(Rune r)\n{\n\treturn (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6);\n}" > "isxdigitrune.c"; +} -- cgit v1.2.1