summaryrefslogtreecommitdiff
path: root/util/nvmutil
diff options
context:
space:
mode:
Diffstat (limited to 'util/nvmutil')
-rw-r--r--util/nvmutil/nvmutil.c917
1 files changed, 683 insertions, 234 deletions
diff --git a/util/nvmutil/nvmutil.c b/util/nvmutil/nvmutil.c
index 6264537f..fe8364f7 100644
--- a/util/nvmutil/nvmutil.c
+++ b/util/nvmutil/nvmutil.c
@@ -15,6 +15,72 @@
* -Os -Wall -Wextra -Werror -pedantic -std=c90
*/
+#define OFF_ERR 0
+#ifndef OFF_RESET
+#define OFF_RESET 1
+#endif
+
+/*
+ * NOTE: older Linux lacked arc4random.
+ * added in glibc 2.36. Just pass HAVE_ARC4RANDOM_BUF=0
+ * at build time if you need old Linux / other libc.
+ */
+#if defined(__OpenBSD__) || defined(__FreeBSD__) || \
+ defined(__NetBSD__) || defined(__APPLE__) || \
+ defined(__linux__)
+#ifndef HAVE_ARC4RANDOM_BUF
+#define HAVE_ARC4RANDOM_BUF 1
+#endif
+#endif
+
+/*
+ * I/O config (build-time)
+ *
+ * Regarding:
+ * Retries on zero-return.
+ *
+ * 5 retries is generous,
+ * but also conservative.
+ * This is enough for e.g.
+ * slow USB flash drives,
+ * busy NFS servers, etc.
+ * Any more is too much
+ * and not of much benefit.
+ *
+ * 3-5 will tolerate buggy
+ * USB drives for example,
+ * but won't spin as long
+ * on really buggy and slow
+ * networks e.g. slow NFS.
+ *
+ * At least 3-5 recommended.
+ * Pass this at build time.
+ */
+#ifndef MAX_ZERO_RW_RETRY
+#define MAX_ZERO_RW_RETRY 5
+#endif
+/*
+ * 0: portable pread/pwrite
+ * 1: real pread/pwrite (thread-safe)
+ * Pass this at build-time
+ */
+#ifndef HAVE_REAL_PREAD_PWRITE
+#define HAVE_REAL_PREAD_PWRITE 0
+#endif
+/*
+ * Configure whether to wait on
+ * EINTR on files, or EAGAIN on
+ * cmd cat (stdout).
+ *
+ * Pass these at build time.
+ */
+#ifndef LOOP_EAGAIN
+#define LOOP_EAGAIN 1
+#endif
+#ifndef LOOP_EINTR
+#define LOOP_EINTR 1
+#endif
+
/*
* Major TODO: split this into multiple files.
* This program has become quite large now, mostly
@@ -157,32 +223,48 @@ also consider:
#include <fcntl.h>
#include <limits.h>
#include <stdarg.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
-typedef unsigned char u8;
+typedef unsigned char u8;
typedef unsigned short ushort;
-typedef unsigned int uint;
+typedef unsigned int uint;
+typedef unsigned long ulong;
/* type asserts */
typedef char static_assert_char_is_8_bits[(CHAR_BIT == 8) ? 1 : -1];
typedef char static_assert_char_is_1[(sizeof(char) == 1) ? 1 : -1];
-typedef char static_assert_uint8_is_1[(sizeof(u8) == 1) ? 1 : -1];
-typedef char static_assert_uint16_is_2[(sizeof(ushort) >= 2) ? 1 : -1];
+typedef char static_assert_u8_is_1[
+ (sizeof(u8) == 1) ? 1 : -1];
+typedef char static_assert_ushort_is_2[
+ (sizeof(ushort) >= 2) ? 1 : -1];
typedef char static_assert_short_is_2[(sizeof(short) >= 2) ? 1 : -1];
-typedef char static_assert_uint32_is_4[(sizeof(uint) >= 4) ? 1 : -1];
+typedef char static_assert_uint_is_4[
+ (sizeof(uint) >= 4) ? 1 : -1];
+typedef char static_assert_ulong_is_4[
+ (sizeof(ulong) >= 4) ? 1 : -1];
typedef char static_assert_int_ge_32[(sizeof(int) >= 4) ? 1 : -1];
typedef char static_assert_twos_complement[
((-1 & 3) == 3) ? 1 : -1
];
+typedef char assert_ulong_ptr[
+ (sizeof(ulong) >= sizeof(void *)) ? 1 : -1
+];
+typedef char assert_size_t_ptr[
+ (sizeof(size_t) >= sizeof(void *)) ? 1 : -1
+];
/*
* We set _FILE_OFFSET_BITS 64, but we only handle
* files that are 128KB in size at a maximum, so we
* realistically only need 32-bit at a minimum.
+ *
+ * We set 64 anyway, because there's no reason not
+ * to, but some systems may ignore _FILE_OFFSET_BITS
*/
typedef char static_assert_off_t_is_32[(sizeof(off_t) >= 4) ? 1 : -1];
@@ -238,12 +320,7 @@ static int xstrxcmp(const char *a, const char *b, size_t maxlen);
/*
* Prep files for reading
- *
- * Portability: /dev/urandom used
- * on Linux / old Unix, whereas
- * arc4random is used on BSD/MacOS.
*/
-static void open_dev_urandom(void);
static void open_gbe_file(void);
static void lock_gbe_file(void);
static void xopen(int *fd, const char *path, int flags, struct stat *st);
@@ -277,8 +354,10 @@ static void set_mac_nib(size_t mac_str_pos,
size_t mac_byte_pos, size_t mac_nib_pos);
static ushort hextonum(char ch_s);
static ushort rhex(void);
-static ushort fallback_rand(void);
-static unsigned long entropy_jitter(void);
+#if !defined(HAVE_ARC4RANDOM_BUF) || \
+ (HAVE_ARC4RANDOM_BUF) < 1
+static ulong entropy_jitter(void);
+#endif
static void write_mac_part(size_t partnum);
/*
@@ -323,6 +402,8 @@ static void check_bin(size_t a, const char *a_name);
*/
static void rw_gbe_file_part(size_t p, int rw_type,
const char *rw_type_str);
+static void check_written_part(size_t p);
+static void report_io_err_rw(void);
static u8 *gbe_mem_offset(size_t part, const char *f_op);
static off_t gbe_file_offset(size_t part, const char *f_op);
static off_t gbe_x_offset(size_t part, const char *f_op,
@@ -330,24 +411,29 @@ static off_t gbe_x_offset(size_t part, const char *f_op,
static ssize_t rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type);
static ssize_t rw_file_exact(int fd, u8 *mem, size_t len,
- off_t off, int rw_type, int loop_eagain, int loop_eintr);
-static ssize_t rw_file_once(int fd, u8 *mem, size_t len,
- off_t off, int rw_type, size_t rc, int loop_eagain,
- int loop_eintr);
+ off_t off, int rw_type, int loop_eagain, int loop_eintr,
+ size_t max_retries, int off_reset);
static ssize_t prw(int fd, void *mem, size_t nrw,
- off_t off, int rw_type, int loop_eagain, int loop_eintr);
-static int rw_over_nrw(ssize_t r, size_t nrw);
+ off_t off, int rw_type, int loop_eagain, int loop_eintr,
+ int off_reset);
+static int io_args(int fd, void *mem, size_t nrw,
+ off_t off, int rw_type);
+static int check_file(int fd, struct stat *st);
+static ssize_t rw_over_nrw(ssize_t r, size_t nrw);
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
static off_t lseek_loop(int fd, off_t off,
int whence, int loop_eagain, int loop_eintr);
+#endif
static int try_err(int loop_err, int errval);
/*
* Error handling and cleanup
*/
static void err(int nvm_errval, const char *msg, ...);
-static void close_files(void);
+static int close_files(void);
static const char *getnvmprogname(void);
-static void usage(u8 usage_exit);
+static void usage(int usage_exit);
/*
* Sizes in bytes:
@@ -377,18 +463,12 @@ static void usage(u8 usage_exit);
#define NVM_WORDS (NVM_SIZE >> 1)
#define NVM_CHECKSUM_WORD (NVM_WORDS - 1)
-#define NUM_RANDOM_BYTES 12
-static u8 rnum[NUM_RANDOM_BYTES];
-
/*
* Portable macro based on BSD nitems.
* Used to count the number of commands (see below).
*/
#define items(x) (sizeof((x)) / sizeof((x)[0]))
-static const char newrandom[] = "/dev/urandom";
-static const char *rname = NULL;
-
/*
* GbE files can be 8KB, 16KB or 128KB,
* but we only need the two 4KB parts
@@ -398,13 +478,13 @@ static const char *rname = NULL;
*
* The code will handle this properly.
*/
-static u8 buf[GBE_FILE_SIZE];
-static u8 pad[GBE_PART_SIZE]; /* the file that wouldn't die */
+static u8 real_buf[GBE_FILE_SIZE];
+static u8 pad[GBE_FILE_SIZE]; /* the file that wouldn't die */
+static u8 *buf = real_buf;
static ushort mac_buf[3];
static off_t gbe_file_size;
-static int urandom_fd = -1;
static int gbe_fd = -1;
static size_t part;
static u8 part_modified[2];
@@ -432,6 +512,9 @@ static const char *argv0;
#define ARGC_3 3
#define ARGC_4 4
+#define NO_LOOP_EAGAIN 0
+#define NO_LOOP_EINTR 0
+
enum {
IO_READ,
IO_WRITE,
@@ -566,8 +649,6 @@ typedef char assert_read[(IO_READ==0)?1:-1];
typedef char assert_write[(IO_WRITE==1)?1:-1];
typedef char assert_pread[(IO_PREAD==2)?1:-1];
typedef char assert_pwrite[(IO_PWRITE==3)?1:-1];
-typedef char assert_rand_byte[(NUM_RANDOM_BYTES>0)?1:-1];
-typedef char assert_rand_len[(NUM_RANDOM_BYTES<NVM_SIZE)?1:-1];
/* commands */
typedef char assert_cmd_dump[(CMD_DUMP==0)?1:-1];
typedef char assert_cmd_setmac[(CMD_SETMAC==1)?1:-1];
@@ -591,8 +672,27 @@ typedef char bool_skip_checksum_write[(SKIP_CHECKSUM_WRITE==0)?1:-1];
typedef char bool_checksum_write[(CHECKSUM_WRITE==1)?1:-1];
typedef char bool_no_invert[(NO_INVERT==0)?1:-1];
typedef char bool_part_invert[(PART_INVERT==1)?1:-1];
-
-static int use_prng = 0;
+typedef char bool_loop_eintr[(LOOP_EINTR==1||LOOP_EINTR==0)?1:-1];
+typedef char bool_loop_eagain[(LOOP_EAGAIN==1||LOOP_EAGAIN==0)?1:-1];
+typedef char bool_no_loop_eintr[(NO_LOOP_EINTR==0)?1:-1];
+typedef char bool_no_loop_eagain[(NO_LOOP_EAGAIN==0)?1:-1];
+typedef char bool_off_err[(OFF_ERR==0)?1:-1];
+typedef char bool_off_reset[(OFF_RESET==0||OFF_RESET==1)?1:-1];
+
+static int io_err_gbe = 0;
+static int rw_check_err_read[] = {0, 0};
+static int rw_check_partial_read[] = {0, 0};
+static int rw_check_bad_part[] = {0, 0};
+
+static int post_rw_checksum[] = {0, 0};
+
+static dev_t gbe_dev;
+static ino_t gbe_ino;
+
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
+void arc4random_buf(void *buf, size_t n);
+#endif
int
main(int argc, char *argv[])
@@ -605,12 +705,12 @@ main(int argc, char *argv[])
#ifdef NVMUTIL_PLEDGE
#ifdef NVMUTIL_UNVEIL
- if (pledge("stdio rpath wpath unveil", NULL) == -1)
+ if (pledge("stdio flock rpath wpath unveil", NULL) == -1)
err(errno, "pledge");
- if (unveil("/dev/urandom", "r") == -1)
- err(errno, "unveil /dev/urandom");
+ if (unveil("/dev/null", "r") == -1)
+ err(errno, "unveil /dev/null");
#else
- if (pledge("stdio rpath wpath", NULL) == -1)
+ if (pledge("stdio flock rpath wpath", NULL) == -1)
err(errno, "pledge");
#endif
#endif
@@ -627,25 +727,28 @@ main(int argc, char *argv[])
err(errno, "%s: unveil ro", fname);
if (unveil(NULL, NULL) == -1)
err(errno, "unveil block (ro)");
- if (pledge("stdio rpath", NULL) == -1)
+ if (pledge("stdio flock rpath", NULL) == -1)
err(errno, "pledge ro (kill unveil)");
} else {
if (unveil(fname, "rw") == -1)
err(errno, "%s: unveil rw", fname);
if (unveil(NULL, NULL) == -1)
err(errno, "unveil block (rw)");
- if (pledge("stdio rpath wpath", NULL) == -1)
+ if (pledge("stdio flock rpath wpath", NULL) == -1)
err(errno, "pledge rw (kill unveil)");
}
#else
if (command[cmd_index].flags == O_RDONLY) {
- if (pledge("stdio rpath", NULL) == -1)
+ if (pledge("stdio flock rpath", NULL) == -1)
err(errno, "pledge ro");
}
#endif
#endif
- open_dev_urandom();
+#if !defined(HAVE_ARC4RANDOM_BUF) || \
+ (HAVE_ARC4RANDOM_BUF) < 1
+ srand((uint)(time(NULL) ^ getpid()));
+#endif
open_gbe_file();
lock_gbe_file();
@@ -665,10 +768,29 @@ main(int argc, char *argv[])
run_cmd(cmd_index);
- if (command[cmd_index].flags == O_RDWR)
+ if (command[cmd_index].flags == O_RDWR) {
+
write_gbe_file();
- close_files();
+ /*
+ * We may otherwise read from
+ * cache, so we must sync.
+ */
+ if (fsync(gbe_fd) == -1)
+ err(errno, "%s: fsync (pre-verification)",
+ fname);
+
+ check_written_part(0);
+ check_written_part(1);
+
+ report_io_err_rw();
+
+ if (io_err_gbe)
+ err(EIO, "%s: bad write", fname);
+ }
+
+ if (close_files() == -1)
+ err(EIO, "%s: close", fname);
return EXIT_SUCCESS;
}
@@ -698,19 +820,19 @@ sanitize_command_index(size_t c)
if (command[c].argc < 3)
err(EINVAL, "cmd index %lu: argc below 3, %d",
- (unsigned long)c, command[c].argc);
+ (ulong)c, command[c].argc);
if (command[c].str == NULL)
err(EINVAL, "cmd index %lu: NULL str",
- (unsigned long)c);
+ (ulong)c);
if (*command[c].str == '\0')
err(EINVAL, "cmd index %lu: empty str",
- (unsigned long)c);
+ (ulong)c);
if (xstrxlen(command[c].str, MAX_CMD_LEN + 1) >
MAX_CMD_LEN) {
err(EINVAL, "cmd index %lu: str too long: %s",
- (unsigned long)c, command[c].str);
+ (ulong)c, command[c].str);
}
mod_type = command[c].set_modified;
@@ -738,12 +860,12 @@ sanitize_command_index(size_t c)
break;
default:
err(EINVAL, "Unsupported rw_size: %lu",
- (unsigned long)gbe_rw_size);
+ (ulong)gbe_rw_size);
}
if (gbe_rw_size > GBE_PART_SIZE)
err(EINVAL, "rw_size larger than GbE part: %lu",
- (unsigned long)gbe_rw_size);
+ (ulong)gbe_rw_size);
if (command[c].flags != O_RDONLY &&
command[c].flags != O_RDWR)
@@ -796,13 +918,13 @@ set_cmd_args(int argc, char *argv[])
static size_t
conv_argv_part_num(const char *part_str)
{
- unsigned char ch;
+ u8 ch;
if (part_str[0] == '\0' || part_str[1] != '\0')
err(EINVAL, "Partnum string '%s' wrong length", part_str);
/* char signedness is implementation-defined */
- ch = (unsigned char)part_str[0];
+ ch = (u8)part_str[0];
if (ch < '0' || ch > '1')
err(EINVAL, "Bad part number (%c)", ch);
@@ -825,11 +947,17 @@ xstrxcmp(const char *a, const char *b, size_t maxlen)
err(EINVAL, "Empty string in xstrxcmp");
for (i = 0; i < maxlen; i++) {
- if (a[i] != b[i])
- return (unsigned char)a[i] - (unsigned char)b[i];
+ u8 ac = (u8)a[i];
+ u8 bc = (u8)b[i];
+
+ if (ac == '\0' || bc == '\0') {
+ if (ac == bc)
+ return 0;
+ return ac - bc;
+ }
- if (a[i] == '\0')
- return 0;
+ if (ac != bc)
+ return ac - bc;
}
/*
@@ -845,26 +973,39 @@ xstrxcmp(const char *a, const char *b, size_t maxlen)
}
static void
-open_dev_urandom(void)
-{
- rname = newrandom;
- urandom_fd = open(rname, O_RDONLY);
- if (urandom_fd != -1)
- return;
-
- /* fallback on VERY VERY VERY old unix */
- use_prng = 1;
- srand((unsigned)(time(NULL) ^ getpid()));
-}
-
-static void
open_gbe_file(void)
{
struct stat gbe_st;
+ int flags;
xopen(&gbe_fd, fname,
command[cmd_index].flags | O_BINARY | O_NOFOLLOW, &gbe_st);
+ /* inode will be checked later on write */
+ gbe_dev = gbe_st.st_dev;
+ gbe_ino = gbe_st.st_ino;
+
+ if (gbe_st.st_nlink > 1)
+ fprintf(stderr,
+ "%s: warning: file has %lu hard links\n",
+ fname, (ulong)gbe_st.st_nlink);
+
+ if (gbe_st.st_nlink == 0)
+ err(EIO, "%s: file unlinked while open", fname);
+
+ flags = fcntl(gbe_fd, F_GETFL);
+ if (flags == -1)
+ err(errno, "%s: fcntl(F_GETFL)", fname);
+
+ /*
+ * O_APPEND must not be used, because this
+ * allows POSIX write() to ignore the
+ * current write offset and write at EOF,
+ * which would therefore break pread/pwrite
+ */
+ if (flags & O_APPEND)
+ err(EIO, "%s: O_APPEND flag");
+
gbe_file_size = gbe_st.st_size;
switch (gbe_file_size) {
@@ -906,6 +1047,9 @@ xopen(int *fd_ptr, const char *path, int flags, struct stat *st)
if (!S_ISREG(st->st_mode))
err(errno, "%s: not a regular file", path);
+
+ if (lseek(*fd_ptr, 0, SEEK_CUR) == (off_t)-1)
+ err(errno, "%s: file not seekable", path);
}
static void
@@ -972,7 +1116,7 @@ read_checksums(void)
if (num_invalid >= max_invalid) {
if (max_invalid == 1)
err(ECANCELED, "%s: part %lu has a bad checksum",
- fname, (unsigned long)part);
+ fname, (ulong)part);
err(ECANCELED, "%s: No valid checksum found in file",
fname);
}
@@ -1003,7 +1147,7 @@ check_command_num(size_t c)
{
if (!valid_command(c))
err(EINVAL, "Invalid run_cmd arg: %lu",
- (unsigned long)c);
+ (ulong)c);
}
static u8
@@ -1014,7 +1158,7 @@ valid_command(size_t c)
if (c != command[c].chk)
err(EINVAL, "Invalid cmd chk value (%lu) vs arg: %lu",
- (unsigned long)command[c].chk, (unsigned long)c);
+ (ulong)command[c].chk, (ulong)c);
return 1;
}
@@ -1128,14 +1272,14 @@ set_mac_nib(size_t mac_str_pos,
static ushort
hextonum(char ch_s)
{
- unsigned char ch = (unsigned char)ch_s;
+ u8 ch = (u8)ch_s;
- if ((unsigned)(ch - '0') <= 9)
+ if ((uint)(ch - '0') <= 9)
return ch - '0';
ch |= 0x20;
- if ((unsigned)(ch - 'a') <= 5)
+ if ((uint)(ch - 'a') <= 5)
return ch - 'a' + 10;
if (ch == '?' || ch == 'x')
@@ -1144,36 +1288,35 @@ hextonum(char ch_s)
return 16; /* invalid character */
}
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
static ushort
rhex(void)
{
+ static u8 num[12];
static size_t n = 0;
- if (use_prng)
- return fallback_rand();
-
if (!n) {
- n = sizeof(rnum);
- if (rw_file_exact(urandom_fd, rnum, n, 0, IO_READ, 0, 1) == -1)
- err(errno, "Randomisation failed");
+ n = 12;
+ arc4random_buf(num, 12);
}
- return (ushort)(rnum[--n] & 0xf);
+ return num[--n] & 0xf;
}
-
+#else
static ushort
-fallback_rand(void)
+rhex(void)
{
struct timeval tv;
- unsigned long mix;
- static unsigned long counter = 0;
+ ulong mix;
+ static ulong counter = 0;
gettimeofday(&tv, NULL);
- mix = (unsigned long)tv.tv_sec
- ^ (unsigned long)tv.tv_usec
- ^ (unsigned long)getpid()
- ^ (unsigned long)&mix
+ mix = (ulong)tv.tv_sec
+ ^ (ulong)tv.tv_usec
+ ^ (ulong)getpid()
+ ^ (ulong)&mix
^ counter++
^ entropy_jitter();
@@ -1181,18 +1324,18 @@ fallback_rand(void)
* Stack addresses can vary between
* calls, thus increasing entropy.
*/
- mix ^= (unsigned long)&mix;
- mix ^= (unsigned long)&tv;
- mix ^= (unsigned long)&counter;
+ mix ^= (ulong)&mix;
+ mix ^= (ulong)&tv;
+ mix ^= (ulong)&counter;
return (ushort)(mix & 0xf);
}
-static unsigned long
+static ulong
entropy_jitter(void)
{
struct timeval a, b;
- unsigned long mix = 0;
+ ulong mix = 0;
long mix_diff;
int i;
@@ -1209,12 +1352,13 @@ entropy_jitter(void)
if (mix_diff < 0)
mix_diff = -mix_diff;
- mix ^= (unsigned long)(mix_diff);
- mix ^= (unsigned long)&mix;
+ mix ^= (ulong)(mix_diff);
+ mix ^= (ulong)&mix;
}
return mix;
}
+#endif
static void
write_mac_part(size_t partnum)
@@ -1229,7 +1373,7 @@ write_mac_part(size_t partnum)
set_nvm_word(w, partnum, mac_buf[w]);
printf("Wrote MAC address to part %lu: ",
- (unsigned long)partnum);
+ (ulong)partnum);
print_mac_from_nvm(partnum);
}
@@ -1246,11 +1390,11 @@ cmd_helper_dump(void)
fprintf(stderr,
"BAD checksum %04x in part %lu (expected %04x)\n",
nvm_word(NVM_CHECKSUM_WORD, partnum),
- (unsigned long)partnum,
+ (ulong)partnum,
calculated_checksum(partnum));
printf("MAC (part %lu): ",
- (unsigned long)partnum);
+ (ulong)partnum);
print_mac_from_nvm(partnum);
hexdump(partnum);
}
@@ -1265,8 +1409,8 @@ print_mac_from_nvm(size_t partnum)
for (c = 0; c < 3; c++) {
val16 = nvm_word(c, partnum);
printf("%02x:%02x",
- (unsigned int)(val16 & 0xff),
- (unsigned int)(val16 >> 8));
+ (uint)(val16 & 0xff),
+ (uint)(val16 >> 8));
if (c == 2)
printf("\n");
else
@@ -1282,14 +1426,14 @@ hexdump(size_t partnum)
ushort val16;
for (row = 0; row < 8; row++) {
- printf("%08lx ", (unsigned long)((size_t)row << 4));
+ printf("%08lx ", (ulong)((size_t)row << 4));
for (c = 0; c < 8; c++) {
val16 = nvm_word((row << 3) + c, partnum);
if (c == 4)
printf(" ");
printf(" %02x %02x",
- (unsigned int)(val16 & 0xff),
- (unsigned int)(val16 >> 8));
+ (uint)(val16 & 0xff),
+ (uint)(val16 >> 8));
}
printf("\n");
}
@@ -1312,7 +1456,7 @@ cmd_helper_cat(void)
fflush(NULL);
for (p = 0; p < 2; p++) {
- gbe_cat_buf(buf + (p * GBE_PART_SIZE));
+ gbe_cat_buf(buf + (size_t)(p * GBE_PART_SIZE));
for (ff = 0; ff < n; ff++)
gbe_cat_buf(pad);
@@ -1323,7 +1467,8 @@ static void
gbe_cat_buf(u8 *b)
{
if (rw_file_exact(STDOUT_FILENO, b,
- GBE_PART_SIZE, 0, IO_WRITE, 1, 1) < 0)
+ GBE_PART_SIZE, 0, IO_WRITE, LOOP_EAGAIN, LOOP_EINTR,
+ MAX_ZERO_RW_RETRY, OFF_ERR) < 0)
err(errno, "stdout: cat");
}
@@ -1346,6 +1491,9 @@ write_gbe_file(void)
if (fstat(gbe_fd, &gbe_st) == -1)
err(errno, "%s: re-check", fname);
+ if (gbe_st.st_dev != gbe_dev || gbe_st.st_ino != gbe_ino)
+ err(EIO, "%s: file replaced while open", fname);
+
if (gbe_st.st_size != gbe_file_size)
err(errno, "%s: file size changed before write", fname);
@@ -1465,7 +1613,7 @@ check_nvm_bound(size_t c, size_t p)
if (c >= NVM_WORDS)
err(ECANCELED, "check_nvm_bound: out of bounds %lu",
- (unsigned long)c);
+ (ulong)c);
}
static void
@@ -1473,21 +1621,23 @@ check_bin(size_t a, const char *a_name)
{
if (a > 1)
err(EINVAL, "%s must be 0 or 1, but is %lu",
- a_name, (unsigned long)a);
+ a_name, (ulong)a);
}
static void
rw_gbe_file_part(size_t p, int rw_type,
const char *rw_type_str)
{
+ ssize_t r;
size_t gbe_rw_size = command[cmd_index].rw_size;
u8 invert = command[cmd_index].invert;
u8 *mem_offset;
+ off_t file_offset;
if (rw_type < IO_PREAD || rw_type > IO_PWRITE)
err(errno, "%s: %s: part %lu: invalid rw_type, %d",
- fname, rw_type_str, (unsigned long)p, rw_type);
+ fname, rw_type_str, (ulong)p, rw_type);
if (rw_type == IO_PWRITE)
invert = 0;
@@ -1497,12 +1647,123 @@ rw_gbe_file_part(size_t p, int rw_type,
* E.g. read from p0 (file) to p1 (mem).
*/
mem_offset = gbe_mem_offset(p ^ invert, rw_type_str);
+ file_offset = (off_t)gbe_file_offset(p, rw_type_str);
- if (rw_gbe_file_exact(gbe_fd, mem_offset,
- gbe_rw_size, gbe_file_offset(p, rw_type_str),
- rw_type) == -1)
+ r = rw_gbe_file_exact(gbe_fd, mem_offset,
+ gbe_rw_size, file_offset, rw_type);
+
+ if (r == -1)
err(errno, "%s: %s: part %lu",
- fname, rw_type_str, (unsigned long)p);
+ fname, rw_type_str, (ulong)p);
+
+ if ((size_t)r != gbe_rw_size)
+ err(EIO, "%s: partial %s: part %lu",
+ fname, rw_type_str, (ulong)p);
+}
+
+static void
+check_written_part(size_t p)
+{
+ ssize_t r;
+ size_t gbe_rw_size;
+ u8 *mem_offset;
+ off_t file_offset;
+ u8 *buf_restore;
+ struct stat st;
+
+ if (!part_modified[p])
+ return;
+
+ gbe_rw_size = command[cmd_index].rw_size;
+
+ /* invert not needed for pwrite */
+ mem_offset = gbe_mem_offset(p, "pwrite");
+ file_offset = (off_t)gbe_file_offset(p, "pwrite");
+
+ memset(pad, 0xff, sizeof(pad));
+
+ if (fstat(gbe_fd, &st) == -1)
+ err(errno, "%s: fstat (post-write)", fname);
+
+ if (st.st_dev != gbe_dev || st.st_ino != gbe_ino)
+ err(EIO, "%s: file changed during write", fname);
+
+ r = rw_gbe_file_exact(gbe_fd, pad,
+ gbe_rw_size, file_offset, IO_PREAD);
+
+ if (r == -1)
+ rw_check_err_read[p] = io_err_gbe = 1;
+ else if ((size_t)r != gbe_rw_size)
+ rw_check_partial_read[p] = io_err_gbe = 1;
+ else if (memcmp(mem_offset, pad, gbe_rw_size) != 0)
+ rw_check_bad_part[p] = io_err_gbe = 1;
+
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p])
+ return;
+
+ /*
+ * We only load one part on-file, into memory but
+ * always at offset zero, for post-write checks.
+ * That's why we hardcode good_checksum(0).
+ */
+ buf_restore = buf;
+ buf = pad;
+ post_rw_checksum[p] = good_checksum(0);
+ buf = buf_restore;
+}
+
+static void
+report_io_err_rw(void)
+{
+ size_t p;
+
+ if (!io_err_gbe)
+ return;
+
+ for (p = 0; p < 2; p++) {
+ if (!part_modified[p])
+ continue;
+
+ if (rw_check_err_read[p])
+ fprintf(stderr,
+ "%s: pread: p%lu (post-verification)\n",
+ fname, (ulong)p);
+ if (rw_check_partial_read[p])
+ fprintf(stderr,
+ "%s: partial pread: p%lu (post-verification)\n",
+ fname, (ulong)p);
+ if (rw_check_bad_part[p])
+ fprintf(stderr,
+ "%s: pwrite: corrupt write on p%lu\n",
+ fname, (ulong)p);
+
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p]) {
+ fprintf(stderr,
+ "%s: p%lu: skipped checksum verification "
+ "(because read failed)\n",
+ fname, (ulong)p);
+
+ continue;
+ }
+
+ fprintf(stderr, "%s: ", fname);
+
+ if (post_rw_checksum[p])
+ fprintf(stderr, "GOOD");
+ else
+ fprintf(stderr, "BAD");
+
+ fprintf(stderr, " checksum in p%lu on-disk.\n",
+ (ulong)p);
+
+ if (post_rw_checksum[p]) {
+ fprintf(stderr,
+ " This does NOT mean it's safe. it may be\n"
+ " salvageable if you use the cat feature.\n");
+ }
+ }
}
/*
@@ -1516,7 +1777,7 @@ gbe_mem_offset(size_t p, const char *f_op)
off_t gbe_off = gbe_x_offset(p, f_op, "mem",
GBE_PART_SIZE, GBE_FILE_SIZE);
- return (u8 *)(buf + gbe_off);
+ return (u8 *)(buf + (size_t)gbe_off);
}
/*
@@ -1560,13 +1821,23 @@ static ssize_t
rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type)
{
- if (mem == NULL)
- goto err_rw_gbe_file_exact;
+ size_t mem_addr;
+ size_t buf_addr;
+ ssize_t r;
- if (mem != (void *)pad
- && mem != (void *)rnum
- && (mem < buf || mem >= (buf + GBE_FILE_SIZE)))
- goto err_rw_gbe_file_exact;
+ if (io_args(fd, mem, nrw, off, rw_type) == -1)
+ return -1;
+
+ mem_addr = (size_t)(void *)mem;
+ buf_addr = (size_t)(void *)buf;
+
+ if (mem != (void *)pad) {
+ if (mem_addr < buf_addr)
+ goto err_rw_gbe_file_exact;
+
+ if ((mem_addr - buf_addr) >= (size_t)GBE_FILE_SIZE)
+ goto err_rw_gbe_file_exact;
+ }
if (off < 0 || off >= gbe_file_size)
goto err_rw_gbe_file_exact;
@@ -1574,10 +1845,14 @@ rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
if (nrw > (size_t)(gbe_file_size - off))
goto err_rw_gbe_file_exact;
- if (nrw > GBE_PART_SIZE)
+ if (nrw > (size_t)GBE_PART_SIZE)
goto err_rw_gbe_file_exact;
- return rw_file_exact(fd, mem, nrw, off, rw_type, 0, 1);
+ r = rw_file_exact(fd, mem, nrw, off, rw_type,
+ NO_LOOP_EAGAIN, LOOP_EINTR, MAX_ZERO_RW_RETRY,
+ OFF_ERR);
+
+ return rw_over_nrw(r, nrw);
err_rw_gbe_file_exact:
errno = EIO;
@@ -1585,78 +1860,89 @@ err_rw_gbe_file_exact:
}
/*
- * Read or write the exact contents of a file,
- * along with a buffer, (if applicable) offset,
- * and number of bytes to be read. It unifies
- * the functionality of read(), pread(), write()
- * and pwrite(), with retry-on-EINTR and also
- * prevents infinite loop on zero-reads.
+ * Safe I/O functions wrapping around
+ * read(), write() and providing a portable
+ * analog of both pread() and pwrite().
+ * These functions are designed for maximum
+ * robustness, checking NULL inputs, overflowed
+ * outputs, and all kinds of errors that the
+ * standard libc functions don't.
+ *
+ * Looping on EINTR and EAGAIN is supported.
+ * EINTR/EAGAIN looping is done indefinitely.
+ */
+
+/*
+ * rw_file_exact() - Read perfectly or die
+ *
+ * Read/write, and absolutely insist on an
+ * absolute read; e.g. if 100 bytes are
+ * requested, this MUST return 100.
*
- * The pread() and pwrite() functionality are
- * provided by yet another portable function,
- * prw() - see notes below.
+ * This function will never return zero.
+ * It will only return below (error),
+ * or above (success). On error, -1 is
+ * returned and errno is set accordingly.
*
- * This must only be used on files. It cannot
- * be used on sockets or pipes, because 0-byte
- * reads are treated like fatal errors. This
- * means that EOF is also considered fatal.
+ * Zero-byte returns are not allowed.
+ * It will re-spin a finite number of
+ * times upon zero-return, to recover,
+ * otherwise it will return an error.
*/
static ssize_t
rw_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type, int loop_eagain,
- int loop_eintr)
+ int loop_eintr, size_t max_retries,
+ int off_reset)
{
- ssize_t rv;
- size_t rc;
+ ssize_t rv = 0;
+ ssize_t rc = 0;
+ size_t retries_on_zero = 0;
+ off_t off_cur;
+ size_t nrw_cur;
+ void *mem_cur;
- for (rc = 0, rv = 0; rc < nrw; ) {
- if ((rv = rw_file_once(fd, mem, nrw, off, rw_type, rc,
- loop_eagain, loop_eintr)) <= 0)
- return -1;
+ if (io_args(fd, mem, nrw, off, rw_type) == -1)
+ return -1;
- rc += (size_t)rv;
- }
+ while (1) {
+
+ /* Prevent theoretical overflow */
+ if (rv >= 0 && (size_t)rv > (nrw - rc))
+ goto err_rw_file_exact;
- return rc;
-}
+ rc += rv;
+ if ((size_t)rc >= nrw)
+ break;
-/*
- * Helper function for rw_file_exact, that
- * also does extra error handling pertaining
- * to GbE file offsets.
- *
- * May not return all requested bytes (nrw).
- * Use rw_file_exact for guaranteed length.
- */
-static ssize_t
-rw_file_once(int fd, u8 *mem, size_t nrw,
- off_t off, int rw_type, size_t rc,
- int loop_eagain, int loop_eintr)
-{
- ssize_t rv;
- size_t retries_on_zero = 0;
- size_t max_retries = 10;
+ mem_cur = (void *)(mem + (size_t)rc);
+ nrw_cur = (size_t)(nrw - (size_t)rc);
+ if (off < 0)
+ goto err_rw_file_exact;
+ off_cur = (off_t)((size_t)off + (size_t)rc);
- if (mem == NULL)
- goto err_rw_file_once;
+ rv = prw(fd, mem_cur, nrw_cur, off_cur,
+ rw_type, loop_eagain, loop_eintr,
+ off_reset);
-read_again:
- rv = prw(fd, mem + rc, nrw - rc, off + rc, rw_type,
- loop_eagain, loop_eintr);
+ if (rv < 0)
+ return -1;
- if (rv < 0)
- return -1;
+ if (rv == 0) {
+ if (retries_on_zero++ < max_retries)
+ continue;
+ goto err_rw_file_exact;
+ }
- if ((size_t)rv > (nrw - rc))/* don't overflow */
- goto err_rw_file_once;
+ retries_on_zero = 0;
+ }
- if (rv != 0)
- return rv;
+ if ((size_t)rc != nrw)
+ goto err_rw_file_exact;
- if (retries_on_zero++ < max_retries)
- goto read_again;
+ return rw_over_nrw(rc, nrw);
-err_rw_file_once:
+err_rw_file_exact:
errno = EIO;
return -1;
}
@@ -1672,6 +1958,9 @@ err_rw_file_once:
* This limitation is acceptable, since nvmutil is
* single-threaded. Portability is the main goal.
*
+ * If you need real pwrite/pread, just compile
+ * with flag: HAVE_REAL_PREAD_PWRITE=1
+ *
* A fallback is provided for regular read/write.
* rw_type can be IO_READ, IO_WRITE, IO_PREAD
* or IO_PWRITE
@@ -1684,31 +1973,58 @@ err_rw_file_once:
* also mitigates a few theoretical libc bugs.
* It is designed for extremely safe single-threaded
* I/O on applications that need it.
+ *
+ * NOTE: If you use loop_eagain (1), you enable wait
+ * loop on EAGAIN. Beware if using this on a non-blocking
+ * pipe (it could spin indefinitely).
+ *
+ * off_reset: if zero, and using fallback pwrite/pread
+ * analogs, we check if a file offset changed,
+ * which would indicate another thread changed
+ * it, and return error, without resetting the
+ * file - this would allow that thread to keep
+ * running, but we could then cause a whole
+ * program exit if we wanted to.
+ * if not zero:
+ * we reset and continue, and pray for the worst.
*/
static ssize_t
prw(int fd, void *mem, size_t nrw,
off_t off, int rw_type,
- int loop_eagain, int loop_eintr)
+ int loop_eagain, int loop_eintr,
+ int off_reset)
{
- off_t off_orig;
ssize_t r;
- int saved_errno;
- int flags;
int positional_rw;
+ struct stat st;
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
+ int saved_errno;
+ off_t verified;
+ off_t off_orig;
+ off_t off_last;
+#endif
- if (mem == NULL)
- goto err_prw;
-
- if (fd < 0
- || off < 0
- || !nrw /* prevent zero read request */
- || nrw > (size_t)SSIZE_MAX /* prevent overflow */
- || (unsigned int)rw_type > IO_PWRITE)
- goto err_prw;
+ if (io_args(fd, mem, nrw, off, rw_type) == -1)
+ return -1;
r = -1;
+ /* Programs like cat can use this,
+ so we only check if it's a normal
+ file if not looping EAGAIN */
+ if (!loop_eagain) {
+ /*
+ * Checking on every run of prw()
+ * is expensive if called many
+ * times, but is defensive in
+ * case the status changes.
+ */
+ if (check_file(fd, &st) == -1)
+ return -1;
+ }
+
if (rw_type >= IO_PREAD)
positional_rw = 1; /* pread/pwrite */
else
@@ -1717,10 +2033,21 @@ prw(int fd, void *mem, size_t nrw,
try_rw_again:
if (!positional_rw) {
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+real_pread_pwrite:
+#endif
if (rw_type == IO_WRITE)
r = write(fd, mem, nrw);
else if (rw_type == IO_READ)
r = read(fd, mem, nrw);
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ else if (rw_type == IO_PWRITE)
+ r = pwrite(fd, mem, nrw, off);
+ else if (rw_type == IO_PREAD)
+ r = pread(fd, mem, nrw, off);
+#endif
if (r == -1 && (errno == try_err(loop_eintr, EINTR)
|| errno == try_err(loop_eagain, EAGAIN)))
@@ -1729,60 +2056,165 @@ try_rw_again:
return rw_over_nrw(r, nrw);
}
- flags = fcntl(fd, F_GETFL);
- if (flags == -1)
- return -1;
-
- /*
- * O_APPEND must not be used, because this
- * allows POSIX write() to ignore the
- * current write offset and write at EOF,
- * which would therefore break pread/pwrite
- */
- if (flags & O_APPEND)
- goto err_prw;
-
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ goto real_pread_pwrite;
+#else
if ((off_orig = lseek_loop(fd, (off_t)0, SEEK_CUR,
- loop_eagain, loop_eintr)) == (off_t)-1)
+ loop_eagain, loop_eintr)) == (off_t)-1) {
r = -1;
- else if (lseek_loop(fd, off, SEEK_SET,
- loop_eagain, loop_eintr) == (off_t)-1)
+ } else if (lseek_loop(fd, off, SEEK_SET,
+ loop_eagain, loop_eintr) == (off_t)-1) {
r = -1;
+ } else {
+ verified = lseek_loop(fd, (off_t)0, SEEK_CUR,
+ loop_eagain, loop_eintr);
- do {
- if (rw_type == IO_PREAD)
- r = read(fd, mem, nrw);
- else if (rw_type == IO_PWRITE)
- r = write(fd, mem, nrw);
-
- r = rw_over_nrw(r, nrw);
- } while (r == -1 &&
- (errno == try_err(loop_eintr, EINTR)
- || errno == try_err(loop_eagain, EAGAIN)));
+ /*
+ * Partial thread-safety: detect
+ * if the offset changed to what
+ * we previously got. If it did,
+ * then another thread may have
+ * changed it. Enabled if
+ * off_reset is OFF_RESET.
+ *
+ * We do this *once*, on the theory
+ * that nothing is touching it now.
+ */
+ if (off_reset && off != verified)
+ lseek_loop(fd, off, SEEK_SET,
+ loop_eagain, loop_eintr);
+
+ do {
+ /*
+ * Verify again before I/O
+ * (even with OFF_ERR)
+ *
+ * This implements the first check
+ * even with OFF_ERR, but without
+ * the recovery. On ERR_RESET, if
+ * the check fails again, then we
+ * know something else is touching
+ * the file, so it's best that we
+ * probably leave it alone and err.
+ *
+ * In other words, ERR_RESET only
+ * tolerates one change. Any more
+ * will cause an exit, including
+ * per EINTR/EAGAIN re-spin.
+ */
+ verified = lseek_loop(fd, (off_t)0, SEEK_CUR,
+ loop_eagain, loop_eintr);
+
+ if (off != verified)
+ goto err_prw;
+
+ if (rw_type == IO_PREAD)
+ r = read(fd, mem, nrw);
+ else if (rw_type == IO_PWRITE)
+ r = write(fd, mem, nrw);
+
+ if (rw_over_nrw(r, nrw) == -1) {
+ errno = EIO;
+ break;
+ }
+
+ } while (r == -1 &&
+ (errno == try_err(loop_eintr, EINTR)
+ || errno == try_err(loop_eagain, EAGAIN)));
+ }
saved_errno = errno;
- if (lseek_loop(fd, off_orig, SEEK_SET,
- loop_eagain, loop_eintr) == (off_t)-1) {
- if (r < 0)
- errno = saved_errno;
- return -1;
+
+ off_last = lseek_loop(fd, off_orig, SEEK_SET,
+ loop_eagain, loop_eintr);
+
+ if (off_last != off_orig) {
+ errno = saved_errno;
+ goto err_prw;
}
+
errno = saved_errno;
return rw_over_nrw(r, nrw);
+#endif
err_prw:
errno = EIO;
return -1;
}
+static int
+io_args(int fd, void *mem, size_t nrw,
+ off_t off, int rw_type)
+{
+ /* obviously */
+ if (mem == NULL)
+ goto err_io_args;
+
+ /* uninitialised fd */
+ if (fd < 0)
+ goto err_io_args;
+
+ /* negative offset */
+ if (off < 0)
+ goto err_io_args;
+
+ /* prevent zero-byte rw */
+ if (!nrw)
+ goto err_io_args;
+
+ /* prevent overflow */
+ if (nrw > (size_t)SSIZE_MAX)
+ goto err_io_args;
+
+ /* prevent overflow */
+ if (((size_t)off + nrw) < (size_t)off)
+ goto err_io_args;
+
+ if (rw_type > IO_PWRITE)
+ goto err_io_args;
+
+ return 0;
+
+err_io_args:
+ errno = EIO;
+ return -1;
+}
+
+static int
+check_file(int fd, struct stat *st)
+{
+ if (fstat(fd, st) == -1)
+ goto err_is_file;
+
+ if (!S_ISREG(st->st_mode))
+ goto err_is_file;
+
+ return 0;
+
+err_is_file:
+ errno = EIO;
+ return -1;
+}
+
/*
+ * Check overflows caused by buggy libc.
+ *
* POSIX can say whatever it wants.
* specification != implementation
*/
-static int
+static ssize_t
rw_over_nrw(ssize_t r, size_t nrw)
{
+ /*
+ * If a byte length of zero
+ * was requested, that is
+ * clearly a bug. No way.
+ */
+ if (!nrw)
+ goto err_rw_over_nrw;
+
if (r == -1)
return r;
@@ -1818,6 +2250,13 @@ err_rw_over_nrw:
return -1;
}
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
+/*
+ * lseek_loop() does lseek() but optionally
+ * on an EINTR/EAGAIN wait loop. Used by prw()
+ * for setting offsets for positional I/O.
+ */
static off_t
lseek_loop(int fd, off_t off, int whence,
int loop_eagain, int loop_eintr)
@@ -1832,7 +2271,14 @@ lseek_loop(int fd, off_t off, int whence,
return old;
}
+#endif
+/*
+ * If a given error loop is enabled,
+ * e.g. EINTR or EAGAIN, an I/O operation
+ * will loop until errno isn't -1 and one
+ * of these, e.g. -1 and EINTR
+ */
static int
try_err(int loop_err, int errval)
{
@@ -1850,12 +2296,10 @@ err(int nvm_errval, const char *msg, ...)
{
va_list args;
- if (nvm_errval >= 0) {
- close_files();
+ if (errno == 0)
errno = nvm_errval;
- }
- if (errno <= 0)
- errno = ECANCELED;
+
+ (void)close_files();
fprintf(stderr, "%s: ", getnvmprogname());
@@ -1869,20 +2313,25 @@ err(int nvm_errval, const char *msg, ...)
exit(EXIT_FAILURE);
}
-static void
+static int
close_files(void)
{
+ int close_err_gbe = 0;
+ int saved_errno = errno;
+
if (gbe_fd > -1) {
if (close(gbe_fd) == -1)
- err(-1, "%s: close failed", fname);
+ close_err_gbe = errno;
gbe_fd = -1;
}
- if (urandom_fd > -1) {
- if (close(urandom_fd) == -1)
- err(-1, "%s: close failed", rname);
- urandom_fd = -1;
- }
+ if (saved_errno)
+ errno = saved_errno;
+
+ if (close_err_gbe)
+ return -1;
+
+ return 0;
}
static const char *
@@ -1902,7 +2351,7 @@ getnvmprogname(void)
}
static void
-usage(u8 usage_exit)
+usage(int usage_exit)
{
const char *util = getnvmprogname();