diff options
Diffstat (limited to 'util')
| -rw-r--r-- | util/nvmutil/nvmutil.c | 675 |
1 files changed, 513 insertions, 162 deletions
diff --git a/util/nvmutil/nvmutil.c b/util/nvmutil/nvmutil.c index 6264537f..38ecc472 100644 --- a/util/nvmutil/nvmutil.c +++ b/util/nvmutil/nvmutil.c @@ -16,6 +16,67 @@ */ /* + * NOTE: older Linux lacked arc4random. + * added in glibc 2.36. Just pass HAVE_ARC4RANDOM_BUF=0 + * at build time if you need old Linux / other libc. + */ +#if defined(__OpenBSD__) || defined(__FreeBSD__) || \ + defined(__NetBSD__) || defined(__APPLE__) || \ + defined(__linux__) +#ifndef HAVE_ARC4RANDOM_BUF +#define HAVE_ARC4RANDOM_BUF 1 +#endif +#endif + +/* + * I/O config (build-time) + * + * Regarding: + * Retries on zero-return. + * + * 5 retries is generous, + * but also conservative. + * This is enough for e.g. + * slow USB flash drives, + * busy NFS servers, etc. + * Any more is too much + * and not of much benefit. + * + * 3-5 will tolerate buggy + * USB drives for example, + * but won't spin as long + * on really buggy and slow + * networks e.g. slow NFS. + * + * At least 3-5 recommended. + * Pass this at build time. + */ +#ifndef MAX_ZERO_RW_RETRY +#define MAX_ZERO_RW_RETRY 5 +#endif +/* + * 0: portable pread/pwrite + * 1: real pread/pwrite (thread-safe) + * Pass this at build-time + */ +#ifndef HAVE_REAL_PREAD_PWRITE +#define HAVE_REAL_PREAD_PWRITE 0 +#endif +/* + * Configure whether to wait on + * EINTR on files, or EAGAIN on + * cmd cat (stdout). + * + * Pass these at build time. + */ +#ifndef LOOP_EAGAIN +#define LOOP_EAGAIN 1 +#endif +#ifndef LOOP_EINTR +#define LOOP_EINTR 1 +#endif + +/* * Major TODO: split this into multiple files. * This program has become quite large now, mostly * due to all the extra sanity checks / portability. @@ -157,32 +218,45 @@ also consider: #include <fcntl.h> #include <limits.h> #include <stdarg.h> +#include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #include <unistd.h> -typedef unsigned char u8; +typedef unsigned char u8; typedef unsigned short ushort; -typedef unsigned int uint; +typedef unsigned int uint; +typedef unsigned long ulong; /* type asserts */ typedef char static_assert_char_is_8_bits[(CHAR_BIT == 8) ? 1 : -1]; typedef char static_assert_char_is_1[(sizeof(char) == 1) ? 1 : -1]; -typedef char static_assert_uint8_is_1[(sizeof(u8) == 1) ? 1 : -1]; -typedef char static_assert_uint16_is_2[(sizeof(ushort) >= 2) ? 1 : -1]; +typedef char static_assert_u8_is_1[ + (sizeof(u8) == 1) ? 1 : -1]; +typedef char static_assert_ushort_is_2[ + (sizeof(ushort) >= 2) ? 1 : -1]; typedef char static_assert_short_is_2[(sizeof(short) >= 2) ? 1 : -1]; -typedef char static_assert_uint32_is_4[(sizeof(uint) >= 4) ? 1 : -1]; +typedef char static_assert_uint_is_4[ + (sizeof(uint) >= 4) ? 1 : -1]; +typedef char static_assert_ulong_is_4[ + (sizeof(ulong) >= 4) ? 1 : -1]; typedef char static_assert_int_ge_32[(sizeof(int) >= 4) ? 1 : -1]; typedef char static_assert_twos_complement[ ((-1 & 3) == 3) ? 1 : -1 ]; +typedef char assert_ulong_ptr[ + (sizeof(ulong) >= sizeof(void *)) ? 1 : -1 +]; /* * We set _FILE_OFFSET_BITS 64, but we only handle * files that are 128KB in size at a maximum, so we * realistically only need 32-bit at a minimum. + * + * We set 64 anyway, because there's no reason not + * to, but some systems may ignore _FILE_OFFSET_BITS */ typedef char static_assert_off_t_is_32[(sizeof(off_t) >= 4) ? 1 : -1]; @@ -238,12 +312,7 @@ static int xstrxcmp(const char *a, const char *b, size_t maxlen); /* * Prep files for reading - * - * Portability: /dev/urandom used - * on Linux / old Unix, whereas - * arc4random is used on BSD/MacOS. */ -static void open_dev_urandom(void); static void open_gbe_file(void); static void lock_gbe_file(void); static void xopen(int *fd, const char *path, int flags, struct stat *st); @@ -277,8 +346,10 @@ static void set_mac_nib(size_t mac_str_pos, size_t mac_byte_pos, size_t mac_nib_pos); static ushort hextonum(char ch_s); static ushort rhex(void); -static ushort fallback_rand(void); -static unsigned long entropy_jitter(void); +#if !defined(HAVE_ARC4RANDOM_BUF) || \ + (HAVE_ARC4RANDOM_BUF) < 1 +static ulong entropy_jitter(void); +#endif static void write_mac_part(size_t partnum); /* @@ -323,6 +394,8 @@ static void check_bin(size_t a, const char *a_name); */ static void rw_gbe_file_part(size_t p, int rw_type, const char *rw_type_str); +static void check_written_part(size_t p); +static void report_io_err_rw(void); static u8 *gbe_mem_offset(size_t part, const char *f_op); static off_t gbe_file_offset(size_t part, const char *f_op); static off_t gbe_x_offset(size_t part, const char *f_op, @@ -330,12 +403,14 @@ static off_t gbe_x_offset(size_t part, const char *f_op, static ssize_t rw_gbe_file_exact(int fd, u8 *mem, size_t nrw, off_t off, int rw_type); static ssize_t rw_file_exact(int fd, u8 *mem, size_t len, - off_t off, int rw_type, int loop_eagain, int loop_eintr); + off_t off, int rw_type, int loop_eagain, int loop_eintr, + size_t max_retries); static ssize_t rw_file_once(int fd, u8 *mem, size_t len, off_t off, int rw_type, size_t rc, int loop_eagain, - int loop_eintr); + int loop_eintr, size_t max_retries); static ssize_t prw(int fd, void *mem, size_t nrw, off_t off, int rw_type, int loop_eagain, int loop_eintr); +static int check_file(int fd, struct stat *st); static int rw_over_nrw(ssize_t r, size_t nrw); static off_t lseek_loop(int fd, off_t off, int whence, int loop_eagain, int loop_eintr); @@ -344,10 +419,10 @@ static int try_err(int loop_err, int errval); /* * Error handling and cleanup */ +static int close_files(void); static void err(int nvm_errval, const char *msg, ...); -static void close_files(void); static const char *getnvmprogname(void); -static void usage(u8 usage_exit); +static void usage(int usage_exit); /* * Sizes in bytes: @@ -386,9 +461,6 @@ static u8 rnum[NUM_RANDOM_BYTES]; */ #define items(x) (sizeof((x)) / sizeof((x)[0])) -static const char newrandom[] = "/dev/urandom"; -static const char *rname = NULL; - /* * GbE files can be 8KB, 16KB or 128KB, * but we only need the two 4KB parts @@ -398,13 +470,13 @@ static const char *rname = NULL; * * The code will handle this properly. */ -static u8 buf[GBE_FILE_SIZE]; -static u8 pad[GBE_PART_SIZE]; /* the file that wouldn't die */ +static u8 real_buf[GBE_FILE_SIZE]; +static u8 pad[GBE_FILE_SIZE]; /* the file that wouldn't die */ +static u8 *buf = real_buf; static ushort mac_buf[3]; static off_t gbe_file_size; -static int urandom_fd = -1; static int gbe_fd = -1; static size_t part; static u8 part_modified[2]; @@ -432,6 +504,9 @@ static const char *argv0; #define ARGC_3 3 #define ARGC_4 4 +#define NO_LOOP_EAGAIN 0 +#define NO_LOOP_EINTR 0 + enum { IO_READ, IO_WRITE, @@ -591,8 +666,25 @@ typedef char bool_skip_checksum_write[(SKIP_CHECKSUM_WRITE==0)?1:-1]; typedef char bool_checksum_write[(CHECKSUM_WRITE==1)?1:-1]; typedef char bool_no_invert[(NO_INVERT==0)?1:-1]; typedef char bool_part_invert[(PART_INVERT==1)?1:-1]; +typedef char bool_loop_eintr[(LOOP_EINTR==1||LOOP_EINTR==0)?1:-1]; +typedef char bool_loop_eagain[(LOOP_EAGAIN==1||LOOP_EAGAIN==0)?1:-1]; +typedef char bool_no_loop_eintr[(NO_LOOP_EINTR==0)?1:-1]; +typedef char bool_no_loop_eagain[(NO_LOOP_EAGAIN==0)?1:-1]; + +static int io_err_gbe = 0; +static int rw_check_err_read[] = {0, 0}; +static int rw_check_partial_read[] = {0, 0}; +static int rw_check_bad_part[] = {0, 0}; -static int use_prng = 0; +static int post_rw_checksum[] = {0, 0}; + +static dev_t gbe_dev; +static ino_t gbe_ino; + +#if defined(HAVE_ARC4RANDOM_BUF) && \ + (HAVE_ARC4RANDOM_BUF) > 0 +void arc4random_buf(void *buf, size_t n); +#endif int main(int argc, char *argv[]) @@ -607,8 +699,8 @@ main(int argc, char *argv[]) #ifdef NVMUTIL_UNVEIL if (pledge("stdio rpath wpath unveil", NULL) == -1) err(errno, "pledge"); - if (unveil("/dev/urandom", "r") == -1) - err(errno, "unveil /dev/urandom"); + if (unveil("/dev/null", "r") == -1) + err(errno, "unveil /dev/null"); #else if (pledge("stdio rpath wpath", NULL) == -1) err(errno, "pledge"); @@ -645,7 +737,7 @@ main(int argc, char *argv[]) #endif #endif - open_dev_urandom(); + srand((uint)(time(NULL) ^ getpid())); open_gbe_file(); lock_gbe_file(); @@ -665,10 +757,29 @@ main(int argc, char *argv[]) run_cmd(cmd_index); - if (command[cmd_index].flags == O_RDWR) + if (command[cmd_index].flags == O_RDWR) { + write_gbe_file(); - close_files(); + /* + * We may otherwise read from + * cache, so we must sync. + */ + if (fsync(gbe_fd) == -1) + err(errno, "%s: fsync (pre-verification)", + fname); + + check_written_part(0); + check_written_part(1); + + report_io_err_rw(); + + if (io_err_gbe) + err(EIO, "%s: bad write", fname); + } + + if (close_files() == -1) + err(EIO, "%s: close", fname); return EXIT_SUCCESS; } @@ -698,19 +809,19 @@ sanitize_command_index(size_t c) if (command[c].argc < 3) err(EINVAL, "cmd index %lu: argc below 3, %d", - (unsigned long)c, command[c].argc); + (ulong)c, command[c].argc); if (command[c].str == NULL) err(EINVAL, "cmd index %lu: NULL str", - (unsigned long)c); + (ulong)c); if (*command[c].str == '\0') err(EINVAL, "cmd index %lu: empty str", - (unsigned long)c); + (ulong)c); if (xstrxlen(command[c].str, MAX_CMD_LEN + 1) > MAX_CMD_LEN) { err(EINVAL, "cmd index %lu: str too long: %s", - (unsigned long)c, command[c].str); + (ulong)c, command[c].str); } mod_type = command[c].set_modified; @@ -738,12 +849,12 @@ sanitize_command_index(size_t c) break; default: err(EINVAL, "Unsupported rw_size: %lu", - (unsigned long)gbe_rw_size); + (ulong)gbe_rw_size); } if (gbe_rw_size > GBE_PART_SIZE) err(EINVAL, "rw_size larger than GbE part: %lu", - (unsigned long)gbe_rw_size); + (ulong)gbe_rw_size); if (command[c].flags != O_RDONLY && command[c].flags != O_RDWR) @@ -796,13 +907,13 @@ set_cmd_args(int argc, char *argv[]) static size_t conv_argv_part_num(const char *part_str) { - unsigned char ch; + u8 ch; if (part_str[0] == '\0' || part_str[1] != '\0') err(EINVAL, "Partnum string '%s' wrong length", part_str); /* char signedness is implementation-defined */ - ch = (unsigned char)part_str[0]; + ch = (u8)part_str[0]; if (ch < '0' || ch > '1') err(EINVAL, "Bad part number (%c)", ch); @@ -826,7 +937,7 @@ xstrxcmp(const char *a, const char *b, size_t maxlen) for (i = 0; i < maxlen; i++) { if (a[i] != b[i]) - return (unsigned char)a[i] - (unsigned char)b[i]; + return (u8)a[i] - (u8)b[i]; if (a[i] == '\0') return 0; @@ -845,26 +956,39 @@ xstrxcmp(const char *a, const char *b, size_t maxlen) } static void -open_dev_urandom(void) -{ - rname = newrandom; - urandom_fd = open(rname, O_RDONLY); - if (urandom_fd != -1) - return; - - /* fallback on VERY VERY VERY old unix */ - use_prng = 1; - srand((unsigned)(time(NULL) ^ getpid())); -} - -static void open_gbe_file(void) { struct stat gbe_st; + int flags; xopen(&gbe_fd, fname, command[cmd_index].flags | O_BINARY | O_NOFOLLOW, &gbe_st); + /* inode will be checked later on write */ + gbe_dev = gbe_st.st_dev; + gbe_ino = gbe_st.st_ino; + + if (gbe_st.st_nlink > 1) + fprintf(stderr, + "%s: warning: file has %lu hard links\n", + fname, (ulong)gbe_st.st_nlink); + + if (gbe_st.st_nlink == 0) + err(EIO, "%s: file unlinked while open", fname); + + flags = fcntl(gbe_fd, F_GETFL); + if (flags == -1) + err(errno, "%s: fcntl(F_GETFL)", fname); + + /* + * O_APPEND must not be used, because this + * allows POSIX write() to ignore the + * current write offset and write at EOF, + * which would therefore break pread/pwrite + */ + if (flags & O_APPEND) + err(EIO, "%s: O_APPEND flag"); + gbe_file_size = gbe_st.st_size; switch (gbe_file_size) { @@ -906,6 +1030,9 @@ xopen(int *fd_ptr, const char *path, int flags, struct stat *st) if (!S_ISREG(st->st_mode)) err(errno, "%s: not a regular file", path); + + if (lseek(*fd_ptr, 0, SEEK_CUR) == (off_t)-1) + err(errno, "%s: file not seekable", path); } static void @@ -972,7 +1099,7 @@ read_checksums(void) if (num_invalid >= max_invalid) { if (max_invalid == 1) err(ECANCELED, "%s: part %lu has a bad checksum", - fname, (unsigned long)part); + fname, (ulong)part); err(ECANCELED, "%s: No valid checksum found in file", fname); } @@ -1003,7 +1130,7 @@ check_command_num(size_t c) { if (!valid_command(c)) err(EINVAL, "Invalid run_cmd arg: %lu", - (unsigned long)c); + (ulong)c); } static u8 @@ -1014,7 +1141,7 @@ valid_command(size_t c) if (c != command[c].chk) err(EINVAL, "Invalid cmd chk value (%lu) vs arg: %lu", - (unsigned long)command[c].chk, (unsigned long)c); + (ulong)command[c].chk, (ulong)c); return 1; } @@ -1128,14 +1255,14 @@ set_mac_nib(size_t mac_str_pos, static ushort hextonum(char ch_s) { - unsigned char ch = (unsigned char)ch_s; + u8 ch = (u8)ch_s; - if ((unsigned)(ch - '0') <= 9) + if ((uint)(ch - '0') <= 9) return ch - '0'; ch |= 0x20; - if ((unsigned)(ch - 'a') <= 5) + if ((uint)(ch - 'a') <= 5) return ch - 'a' + 10; if (ch == '?' || ch == 'x') @@ -1144,36 +1271,35 @@ hextonum(char ch_s) return 16; /* invalid character */ } +#if defined(HAVE_ARC4RANDOM_BUF) && \ + (HAVE_ARC4RANDOM_BUF) > 0 static ushort rhex(void) { + static u8 num[12]; static size_t n = 0; - if (use_prng) - return fallback_rand(); - if (!n) { - n = sizeof(rnum); - if (rw_file_exact(urandom_fd, rnum, n, 0, IO_READ, 0, 1) == -1) - err(errno, "Randomisation failed"); + n = 12; + arc4random_buf(num, 12); } - return (ushort)(rnum[--n] & 0xf); + return num[--n] & 0xf; } - +#else static ushort -fallback_rand(void) +rhex(void) { struct timeval tv; - unsigned long mix; - static unsigned long counter = 0; + ulong mix; + static ulong counter = 0; gettimeofday(&tv, NULL); - mix = (unsigned long)tv.tv_sec - ^ (unsigned long)tv.tv_usec - ^ (unsigned long)getpid() - ^ (unsigned long)&mix + mix = (ulong)tv.tv_sec + ^ (ulong)tv.tv_usec + ^ (ulong)getpid() + ^ (ulong)&mix ^ counter++ ^ entropy_jitter(); @@ -1181,18 +1307,18 @@ fallback_rand(void) * Stack addresses can vary between * calls, thus increasing entropy. */ - mix ^= (unsigned long)&mix; - mix ^= (unsigned long)&tv; - mix ^= (unsigned long)&counter; + mix ^= (ulong)&mix; + mix ^= (ulong)&tv; + mix ^= (ulong)&counter; return (ushort)(mix & 0xf); } -static unsigned long +static ulong entropy_jitter(void) { struct timeval a, b; - unsigned long mix = 0; + ulong mix = 0; long mix_diff; int i; @@ -1209,12 +1335,13 @@ entropy_jitter(void) if (mix_diff < 0) mix_diff = -mix_diff; - mix ^= (unsigned long)(mix_diff); - mix ^= (unsigned long)&mix; + mix ^= (ulong)(mix_diff); + mix ^= (ulong)&mix; } return mix; } +#endif static void write_mac_part(size_t partnum) @@ -1229,7 +1356,7 @@ write_mac_part(size_t partnum) set_nvm_word(w, partnum, mac_buf[w]); printf("Wrote MAC address to part %lu: ", - (unsigned long)partnum); + (ulong)partnum); print_mac_from_nvm(partnum); } @@ -1246,11 +1373,11 @@ cmd_helper_dump(void) fprintf(stderr, "BAD checksum %04x in part %lu (expected %04x)\n", nvm_word(NVM_CHECKSUM_WORD, partnum), - (unsigned long)partnum, + (ulong)partnum, calculated_checksum(partnum)); printf("MAC (part %lu): ", - (unsigned long)partnum); + (ulong)partnum); print_mac_from_nvm(partnum); hexdump(partnum); } @@ -1265,8 +1392,8 @@ print_mac_from_nvm(size_t partnum) for (c = 0; c < 3; c++) { val16 = nvm_word(c, partnum); printf("%02x:%02x", - (unsigned int)(val16 & 0xff), - (unsigned int)(val16 >> 8)); + (uint)(val16 & 0xff), + (uint)(val16 >> 8)); if (c == 2) printf("\n"); else @@ -1282,14 +1409,14 @@ hexdump(size_t partnum) ushort val16; for (row = 0; row < 8; row++) { - printf("%08lx ", (unsigned long)((size_t)row << 4)); + printf("%08lx ", (ulong)((size_t)row << 4)); for (c = 0; c < 8; c++) { val16 = nvm_word((row << 3) + c, partnum); if (c == 4) printf(" "); printf(" %02x %02x", - (unsigned int)(val16 & 0xff), - (unsigned int)(val16 >> 8)); + (uint)(val16 & 0xff), + (uint)(val16 >> 8)); } printf("\n"); } @@ -1323,7 +1450,8 @@ static void gbe_cat_buf(u8 *b) { if (rw_file_exact(STDOUT_FILENO, b, - GBE_PART_SIZE, 0, IO_WRITE, 1, 1) < 0) + GBE_PART_SIZE, 0, IO_WRITE, LOOP_EAGAIN, LOOP_EINTR, + MAX_ZERO_RW_RETRY) < 0) err(errno, "stdout: cat"); } @@ -1346,6 +1474,9 @@ write_gbe_file(void) if (fstat(gbe_fd, &gbe_st) == -1) err(errno, "%s: re-check", fname); + if (gbe_st.st_dev != gbe_dev || gbe_st.st_ino != gbe_ino) + err(EIO, "%s: file replaced while open", fname); + if (gbe_st.st_size != gbe_file_size) err(errno, "%s: file size changed before write", fname); @@ -1465,7 +1596,7 @@ check_nvm_bound(size_t c, size_t p) if (c >= NVM_WORDS) err(ECANCELED, "check_nvm_bound: out of bounds %lu", - (unsigned long)c); + (ulong)c); } static void @@ -1473,21 +1604,23 @@ check_bin(size_t a, const char *a_name) { if (a > 1) err(EINVAL, "%s must be 0 or 1, but is %lu", - a_name, (unsigned long)a); + a_name, (ulong)a); } static void rw_gbe_file_part(size_t p, int rw_type, const char *rw_type_str) { + ssize_t r; size_t gbe_rw_size = command[cmd_index].rw_size; u8 invert = command[cmd_index].invert; u8 *mem_offset; + off_t file_offset; if (rw_type < IO_PREAD || rw_type > IO_PWRITE) err(errno, "%s: %s: part %lu: invalid rw_type, %d", - fname, rw_type_str, (unsigned long)p, rw_type); + fname, rw_type_str, (ulong)p, rw_type); if (rw_type == IO_PWRITE) invert = 0; @@ -1497,12 +1630,123 @@ rw_gbe_file_part(size_t p, int rw_type, * E.g. read from p0 (file) to p1 (mem). */ mem_offset = gbe_mem_offset(p ^ invert, rw_type_str); + file_offset = (off_t)gbe_file_offset(p, rw_type_str); + + r = rw_gbe_file_exact(gbe_fd, mem_offset, + gbe_rw_size, file_offset, rw_type); - if (rw_gbe_file_exact(gbe_fd, mem_offset, - gbe_rw_size, gbe_file_offset(p, rw_type_str), - rw_type) == -1) + if (r == -1) err(errno, "%s: %s: part %lu", - fname, rw_type_str, (unsigned long)p); + fname, rw_type_str, (ulong)p); + + if ((size_t)r != gbe_rw_size) + err(EIO, "%s: partial %s: part %lu", + fname, rw_type_str, (ulong)p); +} + +static void +check_written_part(size_t p) +{ + ssize_t r; + size_t gbe_rw_size; + u8 *mem_offset; + off_t file_offset; + u8 *buf_restore; + struct stat st; + + if (!part_modified[p]) + return; + + gbe_rw_size = command[cmd_index].rw_size; + + /* invert not needed for pwrite */ + mem_offset = gbe_mem_offset(p, "pwrite"); + file_offset = (off_t)gbe_file_offset(p, "pwrite"); + + memset(pad, 0xff, sizeof(pad)); + + if (fstat(gbe_fd, &st) == -1) + err(errno, "%s: fstat (post-write)", fname); + + if (st.st_dev != gbe_dev || st.st_ino != gbe_ino) + err(EIO, "%s: file changed during write", fname); + + r = rw_gbe_file_exact(gbe_fd, pad, + gbe_rw_size, file_offset, IO_PREAD); + + if (r == -1) + rw_check_err_read[p] = io_err_gbe = 1; + else if ((size_t)r != gbe_rw_size) + rw_check_partial_read[p] = io_err_gbe = 1; + else if (memcmp(mem_offset, pad, gbe_rw_size) != 0) + rw_check_bad_part[p] = io_err_gbe = 1; + + if (rw_check_err_read[p] || + rw_check_partial_read[p]) + return; + + /* + * We only load one part on-file, into memory but + * always at offset zero, for post-write checks. + * That's why we hardcode good_checksum(0). + */ + buf_restore = buf; + buf = pad; + post_rw_checksum[p] = good_checksum(0); + buf = buf_restore; +} + +static void +report_io_err_rw(void) +{ + size_t p; + + if (!io_err_gbe) + return; + + for (p = 0; p < 2; p++) { + if (!part_modified[p]) + continue; + + if (rw_check_err_read[p]) + fprintf(stderr, + "%s: pread: p%lu (post-verification)\n", + fname, (ulong)p); + if (rw_check_partial_read[p]) + fprintf(stderr, + "%s: partial pread: p%lu (post-verification)\n", + fname, (ulong)p); + if (rw_check_bad_part[p]) + fprintf(stderr, + "%s: pwrite: corrupt write on p%lu\n", + fname, (ulong)p); + + if (rw_check_err_read[p] || + rw_check_partial_read[p]) { + fprintf(stderr, + "%s: p%lu: skipped checksum verification " + "(because read failed)\n", + fname, (ulong)p); + + continue; + } + + fprintf(stderr, "%s: ", fname); + + if (post_rw_checksum[p]) + fprintf(stderr, "GOOD"); + else + fprintf(stderr, "BAD"); + + fprintf(stderr, " checksum in p%lu on-disk.\n", + (ulong)p); + + if (post_rw_checksum[p]) { + fprintf(stderr, + " This does NOT mean it's safe. it may be\n" + " salvageable if you use the cat feature.\n"); + } + } } /* @@ -1560,13 +1804,21 @@ static ssize_t rw_gbe_file_exact(int fd, u8 *mem, size_t nrw, off_t off, int rw_type) { + ulong mem_addr; + ulong buf_addr; + ulong buf_end; + if (mem == NULL) goto err_rw_gbe_file_exact; - if (mem != (void *)pad - && mem != (void *)rnum - && (mem < buf || mem >= (buf + GBE_FILE_SIZE))) - goto err_rw_gbe_file_exact; + mem_addr = (ulong)(void *)mem; + buf_addr = (ulong)(void *)buf; + buf_end = buf_addr + (ulong)GBE_FILE_SIZE; + + if (mem != (void *)pad && + mem != (void *)rnum && + (mem_addr < buf_addr || mem_addr >= buf_end)) + goto err_rw_gbe_file_exact; if (off < 0 || off >= gbe_file_size) goto err_rw_gbe_file_exact; @@ -1577,7 +1829,8 @@ rw_gbe_file_exact(int fd, u8 *mem, size_t nrw, if (nrw > GBE_PART_SIZE) goto err_rw_gbe_file_exact; - return rw_file_exact(fd, mem, nrw, off, rw_type, 0, 1); + return rw_file_exact(fd, mem, nrw, off, rw_type, + NO_LOOP_EAGAIN, LOOP_EINTR, MAX_ZERO_RW_RETRY); err_rw_gbe_file_exact: errno = EIO; @@ -1585,57 +1838,95 @@ err_rw_gbe_file_exact: } /* - * Read or write the exact contents of a file, - * along with a buffer, (if applicable) offset, - * and number of bytes to be read. It unifies - * the functionality of read(), pread(), write() - * and pwrite(), with retry-on-EINTR and also - * prevents infinite loop on zero-reads. + * Safe I/O functions wrapping around + * read(), write() and providing a portable + * analog of both pread() and pwrite(). + * These functions are designed for maximum + * robustness, checking NULL inputs, overflowed + * outputs, and all kinds of errors that the + * standard libc functions don't. + * + * Looping on EINTR and EAGAIN is supported. + * EINTR/EAGAIN looping is done indefinitely. + */ + +/* + * rw_file_exact() - Read perfectly or die * - * The pread() and pwrite() functionality are - * provided by yet another portable function, - * prw() - see notes below. + * Read/write, and absolutely insist on an + * absolute read; e.g. if 100 bytes are + * requested, this MUST return 100. * - * This must only be used on files. It cannot - * be used on sockets or pipes, because 0-byte - * reads are treated like fatal errors. This - * means that EOF is also considered fatal. + * This function will never return zero. + * It will only return below (error), + * or above (success). On error, -1 is + * returned and errno is set accordingly. + * + * Zero-byte returns are not allowed. + * It calls rw_file_once(), which will + * re-try on zero-read a finite number + * of times, to prevent infinite loops + * while also having fault tolerance. */ static ssize_t rw_file_exact(int fd, u8 *mem, size_t nrw, off_t off, int rw_type, int loop_eagain, - int loop_eintr) + int loop_eintr, size_t max_retries) { ssize_t rv; size_t rc; for (rc = 0, rv = 0; rc < nrw; ) { if ((rv = rw_file_once(fd, mem, nrw, off, rw_type, rc, - loop_eagain, loop_eintr)) <= 0) + loop_eagain, loop_eintr, max_retries)) < 0) + return -1; + + /* rw_file_once never returns + zero, but it's still logically + incorrect not to handle it here */ + + if (rv == 0) { + errno = EIO; return -1; + } + + /* Prevent theoretical overflow */ + if ((size_t)rv > nrw - rc) + goto err_rw_file_exact; rc += (size_t)rv; } return rc; + +err_rw_file_exact: + errno = EIO; + return -1; } /* - * Helper function for rw_file_exact, that - * also does extra error handling pertaining - * to GbE file offsets. + * rw_file_once() - Read less than perfectly + * (and possibly die) + * + * Read/write, but don't insist on an + * absolute read; e.g. if 100 bytes are + * requested, this may return 80 <-- fine * - * May not return all requested bytes (nrw). - * Use rw_file_exact for guaranteed length. + * This function will never return zero. + * It will only return below (error), + * or above (success). On error, -1 is + * returned and errno is set accordingly. + * + * Zero-byte returns are not allowed. */ static ssize_t rw_file_once(int fd, u8 *mem, size_t nrw, off_t off, int rw_type, size_t rc, - int loop_eagain, int loop_eintr) + int loop_eagain, int loop_eintr, + size_t max_retries) { ssize_t rv; size_t retries_on_zero = 0; - size_t max_retries = 10; if (mem == NULL) goto err_rw_file_once; @@ -1672,6 +1963,9 @@ err_rw_file_once: * This limitation is acceptable, since nvmutil is * single-threaded. Portability is the main goal. * + * If you need real pwrite/pread, just compile + * with flag: HAVE_REAL_PREAD_PWRITE=1 + * * A fallback is provided for regular read/write. * rw_type can be IO_READ, IO_WRITE, IO_PREAD * or IO_PWRITE @@ -1692,10 +1986,11 @@ prw(int fd, void *mem, size_t nrw, int loop_eagain, int loop_eintr) { off_t off_orig; + off_t off_last; ssize_t r; int saved_errno; - int flags; int positional_rw; + struct stat st; if (mem == NULL) goto err_prw; @@ -1704,11 +1999,25 @@ prw(int fd, void *mem, size_t nrw, || off < 0 || !nrw /* prevent zero read request */ || nrw > (size_t)SSIZE_MAX /* prevent overflow */ - || (unsigned int)rw_type > IO_PWRITE) + || (uint)rw_type > IO_PWRITE) goto err_prw; r = -1; + /* Programs like cat can use this, + so we only check if it's a normal + file if not looping EAGAIN */ + if (!loop_eagain) { + /* + * Checking on every run of prw() + * is expensive if called many + * times, but is defensive in + * case the status changes. + */ + if (check_file(fd, &st) == -1) + return -1; + } + if (rw_type >= IO_PREAD) positional_rw = 1; /* pread/pwrite */ else @@ -1717,10 +2026,21 @@ prw(int fd, void *mem, size_t nrw, try_rw_again: if (!positional_rw) { +#if defined(HAVE_REAL_PREAD_PWRITE) && \ + HAVE_REAL_PREAD_PWRITE > 0 +real_pread_pwrite: +#endif if (rw_type == IO_WRITE) r = write(fd, mem, nrw); else if (rw_type == IO_READ) r = read(fd, mem, nrw); +#if defined(HAVE_REAL_PREAD_PWRITE) && \ + HAVE_REAL_PREAD_PWRITE > 0 + else if (rw_type == IO_PWRITE) + r = pwrite(fd, mem, nrw, off); + else if (rw_type == IO_PREAD) + r = pread(fd, mem, nrw, off); +#endif if (r == -1 && (errno == try_err(loop_eintr, EINTR) || errno == try_err(loop_eagain, EAGAIN))) @@ -1729,19 +2049,10 @@ try_rw_again: return rw_over_nrw(r, nrw); } - flags = fcntl(fd, F_GETFL); - if (flags == -1) - return -1; - - /* - * O_APPEND must not be used, because this - * allows POSIX write() to ignore the - * current write offset and write at EOF, - * which would therefore break pread/pwrite - */ - if (flags & O_APPEND) - goto err_prw; - +#if defined(HAVE_REAL_PREAD_PWRITE) && \ + HAVE_REAL_PREAD_PWRITE > 0 + goto real_pread_pwrite; +#else if ((off_orig = lseek_loop(fd, (off_t)0, SEEK_CUR, loop_eagain, loop_eintr)) == (off_t)-1) r = -1; @@ -1761,22 +2072,43 @@ try_rw_again: || errno == try_err(loop_eagain, EAGAIN))); saved_errno = errno; - if (lseek_loop(fd, off_orig, SEEK_SET, - loop_eagain, loop_eintr) == (off_t)-1) { - if (r < 0) - errno = saved_errno; + off_last = lseek_loop(fd, off_orig, SEEK_SET, + loop_eagain, loop_eintr); + if (off_last == (off_t)-1) { + errno = saved_errno; return -1; } + if (off_last != off_orig) + goto err_prw; errno = saved_errno; return rw_over_nrw(r, nrw); +#endif err_prw: errno = EIO; return -1; } +static int +check_file(int fd, struct stat *st) +{ + if (fstat(fd, st) == -1) + goto err_is_file; + + if (!S_ISREG(st->st_mode)) + goto err_is_file; + + return 0; + +err_is_file: + errno = EIO; + return -1; +} + /* + * Check overflows caused by buggy libc. + * * POSIX can say whatever it wants. * specification != implementation */ @@ -1818,6 +2150,13 @@ err_rw_over_nrw: return -1; } +#if !defined(HAVE_REAL_PREAD_PWRITE) || \ + HAVE_REAL_PREAD_PWRITE < 1 +/* + * lseek_loop() does lseek() but optionally + * on an EINTR/EAGAIN wait loop. Used by prw() + * for setting offsets for positional I/O. + */ static off_t lseek_loop(int fd, off_t off, int whence, int loop_eagain, int loop_eintr) @@ -1832,7 +2171,14 @@ lseek_loop(int fd, off_t off, int whence, return old; } +#endif +/* + * If a given error loop is enabled, + * e.g. EINTR or EAGAIN, an I/O operation + * will loop until errno isn't -1 and one + * of these, e.g. -1 and EINTR + */ static int try_err(int loop_err, int errval) { @@ -1845,17 +2191,38 @@ try_err(int loop_err, int errval) return -1; } +static int +close_files(void) +{ + int close_err_gbe = 0; + int saved_errno = errno; + + if (gbe_fd > -1) { + if (close(gbe_fd) == -1) + close_err_gbe = errno; + gbe_fd = -1; + } + + if (saved_errno) + errno = saved_errno; + + if (close_err_gbe) + return -1; + + return 0; +} + static void err(int nvm_errval, const char *msg, ...) { va_list args; - if (nvm_errval >= 0) { - close_files(); - errno = nvm_errval; - } - if (errno <= 0) + if (errno < 0) errno = ECANCELED; + if (!errno) + errno = nvm_errval; + + (void)close_files(); fprintf(stderr, "%s: ", getnvmprogname()); @@ -1869,22 +2236,6 @@ err(int nvm_errval, const char *msg, ...) exit(EXIT_FAILURE); } -static void -close_files(void) -{ - if (gbe_fd > -1) { - if (close(gbe_fd) == -1) - err(-1, "%s: close failed", fname); - gbe_fd = -1; - } - - if (urandom_fd > -1) { - if (close(urandom_fd) == -1) - err(-1, "%s: close failed", rname); - urandom_fd = -1; - } -} - static const char * getnvmprogname(void) { @@ -1902,7 +2253,7 @@ getnvmprogname(void) } static void -usage(u8 usage_exit) +usage(int usage_exit) { const char *util = getnvmprogname(); |
