From 32429f2c371a88befb2952fa326a979c4c4453ec Mon Sep 17 00:00:00 2001 From: Leah Rowe Date: Fri, 6 Mar 2026 13:02:24 +0000 Subject: util/nvmutil: remove the swap() function directly handle swapping in word and set_word in my testing, x86_64 and arm64 compilers actually produce more efficient code this way. i previously only did a big swap on the whole buffer on big-endian CPUs, and directly accessed without swaps on little-endian, as an optimisation. however, the old code is actually slower than what the compiler produces, with the new code! portability is retained with big-endian host CPUs and little-endian host CPUs. this also avoids the complication of memcpy and is just generally extremely reliable by comparison. Signed-off-by: Leah Rowe --- util/nvmutil/nvmutil.c | 71 ++++++++++++++------------------------------------ 1 file changed, 19 insertions(+), 52 deletions(-) diff --git a/util/nvmutil/nvmutil.c b/util/nvmutil/nvmutil.c index 577efc76..bda82d96 100644 --- a/util/nvmutil/nvmutil.c +++ b/util/nvmutil/nvmutil.c @@ -43,10 +43,10 @@ static void cmd_swap(void); static int good_checksum(int); static uint16_t word(int, int); static void set_word(int, int, uint16_t); +static int get_word_pos(int, int); static void check_bound(int, int); static void write_gbe(void); static void write_gbe_part(int); -static void swap(int); static void usage(void); static void err(int, const char *, ...); static const char *getnvmprogname(void); @@ -296,8 +296,6 @@ read_gbe_part(int p, int invert) { read_file_PERFECTLY_or_die(fd, buf + (SIZE_4KB * (p ^ invert)), SIZE_4KB, ((off_t)p) * partsize, fname, "pread"); - - swap(p ^ invert); } static void @@ -611,33 +609,40 @@ good_checksum(int partnum) return 0; } -/* - * NOTE: memcpy is a bit sticky with host endianness, - * but we currently use it only when swap has - * been handled. just be careful about when the - * swap() function is called. - */ - static uint16_t word(int pos16, int p) { - uint16_t rval = 0; + int pos; check_bound(pos16, p); - memcpy(&rval, buf + (SIZE_4KB * p) + (pos16 << 1), sizeof(uint16_t)); + pos = get_word_pos(pos16, p); - return rval; + return (uint16_t)buf[pos] | ((uint16_t)buf[pos + 1] << 8); } static void set_word(int pos16, int p, uint16_t val16) { + int pos; + check_bound(pos16, p); - memcpy(buf + (SIZE_4KB * p) + (pos16 << 1), &val16, sizeof(uint16_t)); + pos = get_word_pos(pos16, p); + + buf[pos++] = (uint8_t)(val16 & 0xff); + buf[pos] = (uint8_t)(val16 >> 8); part_modified[p] = 1; } +static int +get_word_pos(int pos16, int p) +{ + int off = SIZE_4KB * p; + int pos = (pos16 << 1) + off; + + return pos; +} + static void check_bound(int c, int p) { @@ -678,8 +683,6 @@ write_gbe(void) static void write_gbe_part(int p) { - swap(p); /* swap bytes on big-endian host CPUs */ - if (pwrite(fd, buf + (SIZE_4KB * p), SIZE_4KB, (off_t)p * partsize) != (ssize_t)SIZE_4KB) { err(ECANCELED, @@ -687,42 +690,6 @@ write_gbe_part(int p) } } -/* - * GbE files store bytes in little-endian order. - * This function ensures big-endian host CPU support. - */ -static void -swap(int partnum) -{ - /* - * NVM_SIZE assumed as the limit; see notes in - * check_bound(). - * - * TODO: - * This should be adjusted in the future, if - * we ever wish to work on the extended area. - */ - - size_t w; - size_t x; - - uint8_t *n = buf + (SIZE_4KB * partnum); - - int e = 1; - if (*((uint8_t *)&e) == 1) - return; /* Little-endian host CPU; no swap needed. */ - - /* - * The host CPU stores bytes in big-endian order. - * We will therefore reverse the order in memory: - */ - for (w = 0, x = 1; w < NVM_SIZE; w += 2, x += 2) { - uint8_t chg = n[w]; - n[w] = n[x]; - n[x] = chg; - } -} - static void usage(void) { -- cgit v1.2.1