summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/nvmutil/nvmutil.c675
1 files changed, 513 insertions, 162 deletions
diff --git a/util/nvmutil/nvmutil.c b/util/nvmutil/nvmutil.c
index 6264537f..38ecc472 100644
--- a/util/nvmutil/nvmutil.c
+++ b/util/nvmutil/nvmutil.c
@@ -16,6 +16,67 @@
*/
/*
+ * NOTE: older Linux lacked arc4random.
+ * added in glibc 2.36. Just pass HAVE_ARC4RANDOM_BUF=0
+ * at build time if you need old Linux / other libc.
+ */
+#if defined(__OpenBSD__) || defined(__FreeBSD__) || \
+ defined(__NetBSD__) || defined(__APPLE__) || \
+ defined(__linux__)
+#ifndef HAVE_ARC4RANDOM_BUF
+#define HAVE_ARC4RANDOM_BUF 1
+#endif
+#endif
+
+/*
+ * I/O config (build-time)
+ *
+ * Regarding:
+ * Retries on zero-return.
+ *
+ * 5 retries is generous,
+ * but also conservative.
+ * This is enough for e.g.
+ * slow USB flash drives,
+ * busy NFS servers, etc.
+ * Any more is too much
+ * and not of much benefit.
+ *
+ * 3-5 will tolerate buggy
+ * USB drives for example,
+ * but won't spin as long
+ * on really buggy and slow
+ * networks e.g. slow NFS.
+ *
+ * At least 3-5 recommended.
+ * Pass this at build time.
+ */
+#ifndef MAX_ZERO_RW_RETRY
+#define MAX_ZERO_RW_RETRY 5
+#endif
+/*
+ * 0: portable pread/pwrite
+ * 1: real pread/pwrite (thread-safe)
+ * Pass this at build-time
+ */
+#ifndef HAVE_REAL_PREAD_PWRITE
+#define HAVE_REAL_PREAD_PWRITE 0
+#endif
+/*
+ * Configure whether to wait on
+ * EINTR on files, or EAGAIN on
+ * cmd cat (stdout).
+ *
+ * Pass these at build time.
+ */
+#ifndef LOOP_EAGAIN
+#define LOOP_EAGAIN 1
+#endif
+#ifndef LOOP_EINTR
+#define LOOP_EINTR 1
+#endif
+
+/*
* Major TODO: split this into multiple files.
* This program has become quite large now, mostly
* due to all the extra sanity checks / portability.
@@ -157,32 +218,45 @@ also consider:
#include <fcntl.h>
#include <limits.h>
#include <stdarg.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
-typedef unsigned char u8;
+typedef unsigned char u8;
typedef unsigned short ushort;
-typedef unsigned int uint;
+typedef unsigned int uint;
+typedef unsigned long ulong;
/* type asserts */
typedef char static_assert_char_is_8_bits[(CHAR_BIT == 8) ? 1 : -1];
typedef char static_assert_char_is_1[(sizeof(char) == 1) ? 1 : -1];
-typedef char static_assert_uint8_is_1[(sizeof(u8) == 1) ? 1 : -1];
-typedef char static_assert_uint16_is_2[(sizeof(ushort) >= 2) ? 1 : -1];
+typedef char static_assert_u8_is_1[
+ (sizeof(u8) == 1) ? 1 : -1];
+typedef char static_assert_ushort_is_2[
+ (sizeof(ushort) >= 2) ? 1 : -1];
typedef char static_assert_short_is_2[(sizeof(short) >= 2) ? 1 : -1];
-typedef char static_assert_uint32_is_4[(sizeof(uint) >= 4) ? 1 : -1];
+typedef char static_assert_uint_is_4[
+ (sizeof(uint) >= 4) ? 1 : -1];
+typedef char static_assert_ulong_is_4[
+ (sizeof(ulong) >= 4) ? 1 : -1];
typedef char static_assert_int_ge_32[(sizeof(int) >= 4) ? 1 : -1];
typedef char static_assert_twos_complement[
((-1 & 3) == 3) ? 1 : -1
];
+typedef char assert_ulong_ptr[
+ (sizeof(ulong) >= sizeof(void *)) ? 1 : -1
+];
/*
* We set _FILE_OFFSET_BITS 64, but we only handle
* files that are 128KB in size at a maximum, so we
* realistically only need 32-bit at a minimum.
+ *
+ * We set 64 anyway, because there's no reason not
+ * to, but some systems may ignore _FILE_OFFSET_BITS
*/
typedef char static_assert_off_t_is_32[(sizeof(off_t) >= 4) ? 1 : -1];
@@ -238,12 +312,7 @@ static int xstrxcmp(const char *a, const char *b, size_t maxlen);
/*
* Prep files for reading
- *
- * Portability: /dev/urandom used
- * on Linux / old Unix, whereas
- * arc4random is used on BSD/MacOS.
*/
-static void open_dev_urandom(void);
static void open_gbe_file(void);
static void lock_gbe_file(void);
static void xopen(int *fd, const char *path, int flags, struct stat *st);
@@ -277,8 +346,10 @@ static void set_mac_nib(size_t mac_str_pos,
size_t mac_byte_pos, size_t mac_nib_pos);
static ushort hextonum(char ch_s);
static ushort rhex(void);
-static ushort fallback_rand(void);
-static unsigned long entropy_jitter(void);
+#if !defined(HAVE_ARC4RANDOM_BUF) || \
+ (HAVE_ARC4RANDOM_BUF) < 1
+static ulong entropy_jitter(void);
+#endif
static void write_mac_part(size_t partnum);
/*
@@ -323,6 +394,8 @@ static void check_bin(size_t a, const char *a_name);
*/
static void rw_gbe_file_part(size_t p, int rw_type,
const char *rw_type_str);
+static void check_written_part(size_t p);
+static void report_io_err_rw(void);
static u8 *gbe_mem_offset(size_t part, const char *f_op);
static off_t gbe_file_offset(size_t part, const char *f_op);
static off_t gbe_x_offset(size_t part, const char *f_op,
@@ -330,12 +403,14 @@ static off_t gbe_x_offset(size_t part, const char *f_op,
static ssize_t rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type);
static ssize_t rw_file_exact(int fd, u8 *mem, size_t len,
- off_t off, int rw_type, int loop_eagain, int loop_eintr);
+ off_t off, int rw_type, int loop_eagain, int loop_eintr,
+ size_t max_retries);
static ssize_t rw_file_once(int fd, u8 *mem, size_t len,
off_t off, int rw_type, size_t rc, int loop_eagain,
- int loop_eintr);
+ int loop_eintr, size_t max_retries);
static ssize_t prw(int fd, void *mem, size_t nrw,
off_t off, int rw_type, int loop_eagain, int loop_eintr);
+static int check_file(int fd, struct stat *st);
static int rw_over_nrw(ssize_t r, size_t nrw);
static off_t lseek_loop(int fd, off_t off,
int whence, int loop_eagain, int loop_eintr);
@@ -344,10 +419,10 @@ static int try_err(int loop_err, int errval);
/*
* Error handling and cleanup
*/
+static int close_files(void);
static void err(int nvm_errval, const char *msg, ...);
-static void close_files(void);
static const char *getnvmprogname(void);
-static void usage(u8 usage_exit);
+static void usage(int usage_exit);
/*
* Sizes in bytes:
@@ -386,9 +461,6 @@ static u8 rnum[NUM_RANDOM_BYTES];
*/
#define items(x) (sizeof((x)) / sizeof((x)[0]))
-static const char newrandom[] = "/dev/urandom";
-static const char *rname = NULL;
-
/*
* GbE files can be 8KB, 16KB or 128KB,
* but we only need the two 4KB parts
@@ -398,13 +470,13 @@ static const char *rname = NULL;
*
* The code will handle this properly.
*/
-static u8 buf[GBE_FILE_SIZE];
-static u8 pad[GBE_PART_SIZE]; /* the file that wouldn't die */
+static u8 real_buf[GBE_FILE_SIZE];
+static u8 pad[GBE_FILE_SIZE]; /* the file that wouldn't die */
+static u8 *buf = real_buf;
static ushort mac_buf[3];
static off_t gbe_file_size;
-static int urandom_fd = -1;
static int gbe_fd = -1;
static size_t part;
static u8 part_modified[2];
@@ -432,6 +504,9 @@ static const char *argv0;
#define ARGC_3 3
#define ARGC_4 4
+#define NO_LOOP_EAGAIN 0
+#define NO_LOOP_EINTR 0
+
enum {
IO_READ,
IO_WRITE,
@@ -591,8 +666,25 @@ typedef char bool_skip_checksum_write[(SKIP_CHECKSUM_WRITE==0)?1:-1];
typedef char bool_checksum_write[(CHECKSUM_WRITE==1)?1:-1];
typedef char bool_no_invert[(NO_INVERT==0)?1:-1];
typedef char bool_part_invert[(PART_INVERT==1)?1:-1];
+typedef char bool_loop_eintr[(LOOP_EINTR==1||LOOP_EINTR==0)?1:-1];
+typedef char bool_loop_eagain[(LOOP_EAGAIN==1||LOOP_EAGAIN==0)?1:-1];
+typedef char bool_no_loop_eintr[(NO_LOOP_EINTR==0)?1:-1];
+typedef char bool_no_loop_eagain[(NO_LOOP_EAGAIN==0)?1:-1];
+
+static int io_err_gbe = 0;
+static int rw_check_err_read[] = {0, 0};
+static int rw_check_partial_read[] = {0, 0};
+static int rw_check_bad_part[] = {0, 0};
-static int use_prng = 0;
+static int post_rw_checksum[] = {0, 0};
+
+static dev_t gbe_dev;
+static ino_t gbe_ino;
+
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
+void arc4random_buf(void *buf, size_t n);
+#endif
int
main(int argc, char *argv[])
@@ -607,8 +699,8 @@ main(int argc, char *argv[])
#ifdef NVMUTIL_UNVEIL
if (pledge("stdio rpath wpath unveil", NULL) == -1)
err(errno, "pledge");
- if (unveil("/dev/urandom", "r") == -1)
- err(errno, "unveil /dev/urandom");
+ if (unveil("/dev/null", "r") == -1)
+ err(errno, "unveil /dev/null");
#else
if (pledge("stdio rpath wpath", NULL) == -1)
err(errno, "pledge");
@@ -645,7 +737,7 @@ main(int argc, char *argv[])
#endif
#endif
- open_dev_urandom();
+ srand((uint)(time(NULL) ^ getpid()));
open_gbe_file();
lock_gbe_file();
@@ -665,10 +757,29 @@ main(int argc, char *argv[])
run_cmd(cmd_index);
- if (command[cmd_index].flags == O_RDWR)
+ if (command[cmd_index].flags == O_RDWR) {
+
write_gbe_file();
- close_files();
+ /*
+ * We may otherwise read from
+ * cache, so we must sync.
+ */
+ if (fsync(gbe_fd) == -1)
+ err(errno, "%s: fsync (pre-verification)",
+ fname);
+
+ check_written_part(0);
+ check_written_part(1);
+
+ report_io_err_rw();
+
+ if (io_err_gbe)
+ err(EIO, "%s: bad write", fname);
+ }
+
+ if (close_files() == -1)
+ err(EIO, "%s: close", fname);
return EXIT_SUCCESS;
}
@@ -698,19 +809,19 @@ sanitize_command_index(size_t c)
if (command[c].argc < 3)
err(EINVAL, "cmd index %lu: argc below 3, %d",
- (unsigned long)c, command[c].argc);
+ (ulong)c, command[c].argc);
if (command[c].str == NULL)
err(EINVAL, "cmd index %lu: NULL str",
- (unsigned long)c);
+ (ulong)c);
if (*command[c].str == '\0')
err(EINVAL, "cmd index %lu: empty str",
- (unsigned long)c);
+ (ulong)c);
if (xstrxlen(command[c].str, MAX_CMD_LEN + 1) >
MAX_CMD_LEN) {
err(EINVAL, "cmd index %lu: str too long: %s",
- (unsigned long)c, command[c].str);
+ (ulong)c, command[c].str);
}
mod_type = command[c].set_modified;
@@ -738,12 +849,12 @@ sanitize_command_index(size_t c)
break;
default:
err(EINVAL, "Unsupported rw_size: %lu",
- (unsigned long)gbe_rw_size);
+ (ulong)gbe_rw_size);
}
if (gbe_rw_size > GBE_PART_SIZE)
err(EINVAL, "rw_size larger than GbE part: %lu",
- (unsigned long)gbe_rw_size);
+ (ulong)gbe_rw_size);
if (command[c].flags != O_RDONLY &&
command[c].flags != O_RDWR)
@@ -796,13 +907,13 @@ set_cmd_args(int argc, char *argv[])
static size_t
conv_argv_part_num(const char *part_str)
{
- unsigned char ch;
+ u8 ch;
if (part_str[0] == '\0' || part_str[1] != '\0')
err(EINVAL, "Partnum string '%s' wrong length", part_str);
/* char signedness is implementation-defined */
- ch = (unsigned char)part_str[0];
+ ch = (u8)part_str[0];
if (ch < '0' || ch > '1')
err(EINVAL, "Bad part number (%c)", ch);
@@ -826,7 +937,7 @@ xstrxcmp(const char *a, const char *b, size_t maxlen)
for (i = 0; i < maxlen; i++) {
if (a[i] != b[i])
- return (unsigned char)a[i] - (unsigned char)b[i];
+ return (u8)a[i] - (u8)b[i];
if (a[i] == '\0')
return 0;
@@ -845,26 +956,39 @@ xstrxcmp(const char *a, const char *b, size_t maxlen)
}
static void
-open_dev_urandom(void)
-{
- rname = newrandom;
- urandom_fd = open(rname, O_RDONLY);
- if (urandom_fd != -1)
- return;
-
- /* fallback on VERY VERY VERY old unix */
- use_prng = 1;
- srand((unsigned)(time(NULL) ^ getpid()));
-}
-
-static void
open_gbe_file(void)
{
struct stat gbe_st;
+ int flags;
xopen(&gbe_fd, fname,
command[cmd_index].flags | O_BINARY | O_NOFOLLOW, &gbe_st);
+ /* inode will be checked later on write */
+ gbe_dev = gbe_st.st_dev;
+ gbe_ino = gbe_st.st_ino;
+
+ if (gbe_st.st_nlink > 1)
+ fprintf(stderr,
+ "%s: warning: file has %lu hard links\n",
+ fname, (ulong)gbe_st.st_nlink);
+
+ if (gbe_st.st_nlink == 0)
+ err(EIO, "%s: file unlinked while open", fname);
+
+ flags = fcntl(gbe_fd, F_GETFL);
+ if (flags == -1)
+ err(errno, "%s: fcntl(F_GETFL)", fname);
+
+ /*
+ * O_APPEND must not be used, because this
+ * allows POSIX write() to ignore the
+ * current write offset and write at EOF,
+ * which would therefore break pread/pwrite
+ */
+ if (flags & O_APPEND)
+ err(EIO, "%s: O_APPEND flag");
+
gbe_file_size = gbe_st.st_size;
switch (gbe_file_size) {
@@ -906,6 +1030,9 @@ xopen(int *fd_ptr, const char *path, int flags, struct stat *st)
if (!S_ISREG(st->st_mode))
err(errno, "%s: not a regular file", path);
+
+ if (lseek(*fd_ptr, 0, SEEK_CUR) == (off_t)-1)
+ err(errno, "%s: file not seekable", path);
}
static void
@@ -972,7 +1099,7 @@ read_checksums(void)
if (num_invalid >= max_invalid) {
if (max_invalid == 1)
err(ECANCELED, "%s: part %lu has a bad checksum",
- fname, (unsigned long)part);
+ fname, (ulong)part);
err(ECANCELED, "%s: No valid checksum found in file",
fname);
}
@@ -1003,7 +1130,7 @@ check_command_num(size_t c)
{
if (!valid_command(c))
err(EINVAL, "Invalid run_cmd arg: %lu",
- (unsigned long)c);
+ (ulong)c);
}
static u8
@@ -1014,7 +1141,7 @@ valid_command(size_t c)
if (c != command[c].chk)
err(EINVAL, "Invalid cmd chk value (%lu) vs arg: %lu",
- (unsigned long)command[c].chk, (unsigned long)c);
+ (ulong)command[c].chk, (ulong)c);
return 1;
}
@@ -1128,14 +1255,14 @@ set_mac_nib(size_t mac_str_pos,
static ushort
hextonum(char ch_s)
{
- unsigned char ch = (unsigned char)ch_s;
+ u8 ch = (u8)ch_s;
- if ((unsigned)(ch - '0') <= 9)
+ if ((uint)(ch - '0') <= 9)
return ch - '0';
ch |= 0x20;
- if ((unsigned)(ch - 'a') <= 5)
+ if ((uint)(ch - 'a') <= 5)
return ch - 'a' + 10;
if (ch == '?' || ch == 'x')
@@ -1144,36 +1271,35 @@ hextonum(char ch_s)
return 16; /* invalid character */
}
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
static ushort
rhex(void)
{
+ static u8 num[12];
static size_t n = 0;
- if (use_prng)
- return fallback_rand();
-
if (!n) {
- n = sizeof(rnum);
- if (rw_file_exact(urandom_fd, rnum, n, 0, IO_READ, 0, 1) == -1)
- err(errno, "Randomisation failed");
+ n = 12;
+ arc4random_buf(num, 12);
}
- return (ushort)(rnum[--n] & 0xf);
+ return num[--n] & 0xf;
}
-
+#else
static ushort
-fallback_rand(void)
+rhex(void)
{
struct timeval tv;
- unsigned long mix;
- static unsigned long counter = 0;
+ ulong mix;
+ static ulong counter = 0;
gettimeofday(&tv, NULL);
- mix = (unsigned long)tv.tv_sec
- ^ (unsigned long)tv.tv_usec
- ^ (unsigned long)getpid()
- ^ (unsigned long)&mix
+ mix = (ulong)tv.tv_sec
+ ^ (ulong)tv.tv_usec
+ ^ (ulong)getpid()
+ ^ (ulong)&mix
^ counter++
^ entropy_jitter();
@@ -1181,18 +1307,18 @@ fallback_rand(void)
* Stack addresses can vary between
* calls, thus increasing entropy.
*/
- mix ^= (unsigned long)&mix;
- mix ^= (unsigned long)&tv;
- mix ^= (unsigned long)&counter;
+ mix ^= (ulong)&mix;
+ mix ^= (ulong)&tv;
+ mix ^= (ulong)&counter;
return (ushort)(mix & 0xf);
}
-static unsigned long
+static ulong
entropy_jitter(void)
{
struct timeval a, b;
- unsigned long mix = 0;
+ ulong mix = 0;
long mix_diff;
int i;
@@ -1209,12 +1335,13 @@ entropy_jitter(void)
if (mix_diff < 0)
mix_diff = -mix_diff;
- mix ^= (unsigned long)(mix_diff);
- mix ^= (unsigned long)&mix;
+ mix ^= (ulong)(mix_diff);
+ mix ^= (ulong)&mix;
}
return mix;
}
+#endif
static void
write_mac_part(size_t partnum)
@@ -1229,7 +1356,7 @@ write_mac_part(size_t partnum)
set_nvm_word(w, partnum, mac_buf[w]);
printf("Wrote MAC address to part %lu: ",
- (unsigned long)partnum);
+ (ulong)partnum);
print_mac_from_nvm(partnum);
}
@@ -1246,11 +1373,11 @@ cmd_helper_dump(void)
fprintf(stderr,
"BAD checksum %04x in part %lu (expected %04x)\n",
nvm_word(NVM_CHECKSUM_WORD, partnum),
- (unsigned long)partnum,
+ (ulong)partnum,
calculated_checksum(partnum));
printf("MAC (part %lu): ",
- (unsigned long)partnum);
+ (ulong)partnum);
print_mac_from_nvm(partnum);
hexdump(partnum);
}
@@ -1265,8 +1392,8 @@ print_mac_from_nvm(size_t partnum)
for (c = 0; c < 3; c++) {
val16 = nvm_word(c, partnum);
printf("%02x:%02x",
- (unsigned int)(val16 & 0xff),
- (unsigned int)(val16 >> 8));
+ (uint)(val16 & 0xff),
+ (uint)(val16 >> 8));
if (c == 2)
printf("\n");
else
@@ -1282,14 +1409,14 @@ hexdump(size_t partnum)
ushort val16;
for (row = 0; row < 8; row++) {
- printf("%08lx ", (unsigned long)((size_t)row << 4));
+ printf("%08lx ", (ulong)((size_t)row << 4));
for (c = 0; c < 8; c++) {
val16 = nvm_word((row << 3) + c, partnum);
if (c == 4)
printf(" ");
printf(" %02x %02x",
- (unsigned int)(val16 & 0xff),
- (unsigned int)(val16 >> 8));
+ (uint)(val16 & 0xff),
+ (uint)(val16 >> 8));
}
printf("\n");
}
@@ -1323,7 +1450,8 @@ static void
gbe_cat_buf(u8 *b)
{
if (rw_file_exact(STDOUT_FILENO, b,
- GBE_PART_SIZE, 0, IO_WRITE, 1, 1) < 0)
+ GBE_PART_SIZE, 0, IO_WRITE, LOOP_EAGAIN, LOOP_EINTR,
+ MAX_ZERO_RW_RETRY) < 0)
err(errno, "stdout: cat");
}
@@ -1346,6 +1474,9 @@ write_gbe_file(void)
if (fstat(gbe_fd, &gbe_st) == -1)
err(errno, "%s: re-check", fname);
+ if (gbe_st.st_dev != gbe_dev || gbe_st.st_ino != gbe_ino)
+ err(EIO, "%s: file replaced while open", fname);
+
if (gbe_st.st_size != gbe_file_size)
err(errno, "%s: file size changed before write", fname);
@@ -1465,7 +1596,7 @@ check_nvm_bound(size_t c, size_t p)
if (c >= NVM_WORDS)
err(ECANCELED, "check_nvm_bound: out of bounds %lu",
- (unsigned long)c);
+ (ulong)c);
}
static void
@@ -1473,21 +1604,23 @@ check_bin(size_t a, const char *a_name)
{
if (a > 1)
err(EINVAL, "%s must be 0 or 1, but is %lu",
- a_name, (unsigned long)a);
+ a_name, (ulong)a);
}
static void
rw_gbe_file_part(size_t p, int rw_type,
const char *rw_type_str)
{
+ ssize_t r;
size_t gbe_rw_size = command[cmd_index].rw_size;
u8 invert = command[cmd_index].invert;
u8 *mem_offset;
+ off_t file_offset;
if (rw_type < IO_PREAD || rw_type > IO_PWRITE)
err(errno, "%s: %s: part %lu: invalid rw_type, %d",
- fname, rw_type_str, (unsigned long)p, rw_type);
+ fname, rw_type_str, (ulong)p, rw_type);
if (rw_type == IO_PWRITE)
invert = 0;
@@ -1497,12 +1630,123 @@ rw_gbe_file_part(size_t p, int rw_type,
* E.g. read from p0 (file) to p1 (mem).
*/
mem_offset = gbe_mem_offset(p ^ invert, rw_type_str);
+ file_offset = (off_t)gbe_file_offset(p, rw_type_str);
+
+ r = rw_gbe_file_exact(gbe_fd, mem_offset,
+ gbe_rw_size, file_offset, rw_type);
- if (rw_gbe_file_exact(gbe_fd, mem_offset,
- gbe_rw_size, gbe_file_offset(p, rw_type_str),
- rw_type) == -1)
+ if (r == -1)
err(errno, "%s: %s: part %lu",
- fname, rw_type_str, (unsigned long)p);
+ fname, rw_type_str, (ulong)p);
+
+ if ((size_t)r != gbe_rw_size)
+ err(EIO, "%s: partial %s: part %lu",
+ fname, rw_type_str, (ulong)p);
+}
+
+static void
+check_written_part(size_t p)
+{
+ ssize_t r;
+ size_t gbe_rw_size;
+ u8 *mem_offset;
+ off_t file_offset;
+ u8 *buf_restore;
+ struct stat st;
+
+ if (!part_modified[p])
+ return;
+
+ gbe_rw_size = command[cmd_index].rw_size;
+
+ /* invert not needed for pwrite */
+ mem_offset = gbe_mem_offset(p, "pwrite");
+ file_offset = (off_t)gbe_file_offset(p, "pwrite");
+
+ memset(pad, 0xff, sizeof(pad));
+
+ if (fstat(gbe_fd, &st) == -1)
+ err(errno, "%s: fstat (post-write)", fname);
+
+ if (st.st_dev != gbe_dev || st.st_ino != gbe_ino)
+ err(EIO, "%s: file changed during write", fname);
+
+ r = rw_gbe_file_exact(gbe_fd, pad,
+ gbe_rw_size, file_offset, IO_PREAD);
+
+ if (r == -1)
+ rw_check_err_read[p] = io_err_gbe = 1;
+ else if ((size_t)r != gbe_rw_size)
+ rw_check_partial_read[p] = io_err_gbe = 1;
+ else if (memcmp(mem_offset, pad, gbe_rw_size) != 0)
+ rw_check_bad_part[p] = io_err_gbe = 1;
+
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p])
+ return;
+
+ /*
+ * We only load one part on-file, into memory but
+ * always at offset zero, for post-write checks.
+ * That's why we hardcode good_checksum(0).
+ */
+ buf_restore = buf;
+ buf = pad;
+ post_rw_checksum[p] = good_checksum(0);
+ buf = buf_restore;
+}
+
+static void
+report_io_err_rw(void)
+{
+ size_t p;
+
+ if (!io_err_gbe)
+ return;
+
+ for (p = 0; p < 2; p++) {
+ if (!part_modified[p])
+ continue;
+
+ if (rw_check_err_read[p])
+ fprintf(stderr,
+ "%s: pread: p%lu (post-verification)\n",
+ fname, (ulong)p);
+ if (rw_check_partial_read[p])
+ fprintf(stderr,
+ "%s: partial pread: p%lu (post-verification)\n",
+ fname, (ulong)p);
+ if (rw_check_bad_part[p])
+ fprintf(stderr,
+ "%s: pwrite: corrupt write on p%lu\n",
+ fname, (ulong)p);
+
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p]) {
+ fprintf(stderr,
+ "%s: p%lu: skipped checksum verification "
+ "(because read failed)\n",
+ fname, (ulong)p);
+
+ continue;
+ }
+
+ fprintf(stderr, "%s: ", fname);
+
+ if (post_rw_checksum[p])
+ fprintf(stderr, "GOOD");
+ else
+ fprintf(stderr, "BAD");
+
+ fprintf(stderr, " checksum in p%lu on-disk.\n",
+ (ulong)p);
+
+ if (post_rw_checksum[p]) {
+ fprintf(stderr,
+ " This does NOT mean it's safe. it may be\n"
+ " salvageable if you use the cat feature.\n");
+ }
+ }
}
/*
@@ -1560,13 +1804,21 @@ static ssize_t
rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type)
{
+ ulong mem_addr;
+ ulong buf_addr;
+ ulong buf_end;
+
if (mem == NULL)
goto err_rw_gbe_file_exact;
- if (mem != (void *)pad
- && mem != (void *)rnum
- && (mem < buf || mem >= (buf + GBE_FILE_SIZE)))
- goto err_rw_gbe_file_exact;
+ mem_addr = (ulong)(void *)mem;
+ buf_addr = (ulong)(void *)buf;
+ buf_end = buf_addr + (ulong)GBE_FILE_SIZE;
+
+ if (mem != (void *)pad &&
+ mem != (void *)rnum &&
+ (mem_addr < buf_addr || mem_addr >= buf_end))
+ goto err_rw_gbe_file_exact;
if (off < 0 || off >= gbe_file_size)
goto err_rw_gbe_file_exact;
@@ -1577,7 +1829,8 @@ rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
if (nrw > GBE_PART_SIZE)
goto err_rw_gbe_file_exact;
- return rw_file_exact(fd, mem, nrw, off, rw_type, 0, 1);
+ return rw_file_exact(fd, mem, nrw, off, rw_type,
+ NO_LOOP_EAGAIN, LOOP_EINTR, MAX_ZERO_RW_RETRY);
err_rw_gbe_file_exact:
errno = EIO;
@@ -1585,57 +1838,95 @@ err_rw_gbe_file_exact:
}
/*
- * Read or write the exact contents of a file,
- * along with a buffer, (if applicable) offset,
- * and number of bytes to be read. It unifies
- * the functionality of read(), pread(), write()
- * and pwrite(), with retry-on-EINTR and also
- * prevents infinite loop on zero-reads.
+ * Safe I/O functions wrapping around
+ * read(), write() and providing a portable
+ * analog of both pread() and pwrite().
+ * These functions are designed for maximum
+ * robustness, checking NULL inputs, overflowed
+ * outputs, and all kinds of errors that the
+ * standard libc functions don't.
+ *
+ * Looping on EINTR and EAGAIN is supported.
+ * EINTR/EAGAIN looping is done indefinitely.
+ */
+
+/*
+ * rw_file_exact() - Read perfectly or die
*
- * The pread() and pwrite() functionality are
- * provided by yet another portable function,
- * prw() - see notes below.
+ * Read/write, and absolutely insist on an
+ * absolute read; e.g. if 100 bytes are
+ * requested, this MUST return 100.
*
- * This must only be used on files. It cannot
- * be used on sockets or pipes, because 0-byte
- * reads are treated like fatal errors. This
- * means that EOF is also considered fatal.
+ * This function will never return zero.
+ * It will only return below (error),
+ * or above (success). On error, -1 is
+ * returned and errno is set accordingly.
+ *
+ * Zero-byte returns are not allowed.
+ * It calls rw_file_once(), which will
+ * re-try on zero-read a finite number
+ * of times, to prevent infinite loops
+ * while also having fault tolerance.
*/
static ssize_t
rw_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type, int loop_eagain,
- int loop_eintr)
+ int loop_eintr, size_t max_retries)
{
ssize_t rv;
size_t rc;
for (rc = 0, rv = 0; rc < nrw; ) {
if ((rv = rw_file_once(fd, mem, nrw, off, rw_type, rc,
- loop_eagain, loop_eintr)) <= 0)
+ loop_eagain, loop_eintr, max_retries)) < 0)
+ return -1;
+
+ /* rw_file_once never returns
+ zero, but it's still logically
+ incorrect not to handle it here */
+
+ if (rv == 0) {
+ errno = EIO;
return -1;
+ }
+
+ /* Prevent theoretical overflow */
+ if ((size_t)rv > nrw - rc)
+ goto err_rw_file_exact;
rc += (size_t)rv;
}
return rc;
+
+err_rw_file_exact:
+ errno = EIO;
+ return -1;
}
/*
- * Helper function for rw_file_exact, that
- * also does extra error handling pertaining
- * to GbE file offsets.
+ * rw_file_once() - Read less than perfectly
+ * (and possibly die)
+ *
+ * Read/write, but don't insist on an
+ * absolute read; e.g. if 100 bytes are
+ * requested, this may return 80 <-- fine
*
- * May not return all requested bytes (nrw).
- * Use rw_file_exact for guaranteed length.
+ * This function will never return zero.
+ * It will only return below (error),
+ * or above (success). On error, -1 is
+ * returned and errno is set accordingly.
+ *
+ * Zero-byte returns are not allowed.
*/
static ssize_t
rw_file_once(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type, size_t rc,
- int loop_eagain, int loop_eintr)
+ int loop_eagain, int loop_eintr,
+ size_t max_retries)
{
ssize_t rv;
size_t retries_on_zero = 0;
- size_t max_retries = 10;
if (mem == NULL)
goto err_rw_file_once;
@@ -1672,6 +1963,9 @@ err_rw_file_once:
* This limitation is acceptable, since nvmutil is
* single-threaded. Portability is the main goal.
*
+ * If you need real pwrite/pread, just compile
+ * with flag: HAVE_REAL_PREAD_PWRITE=1
+ *
* A fallback is provided for regular read/write.
* rw_type can be IO_READ, IO_WRITE, IO_PREAD
* or IO_PWRITE
@@ -1692,10 +1986,11 @@ prw(int fd, void *mem, size_t nrw,
int loop_eagain, int loop_eintr)
{
off_t off_orig;
+ off_t off_last;
ssize_t r;
int saved_errno;
- int flags;
int positional_rw;
+ struct stat st;
if (mem == NULL)
goto err_prw;
@@ -1704,11 +1999,25 @@ prw(int fd, void *mem, size_t nrw,
|| off < 0
|| !nrw /* prevent zero read request */
|| nrw > (size_t)SSIZE_MAX /* prevent overflow */
- || (unsigned int)rw_type > IO_PWRITE)
+ || (uint)rw_type > IO_PWRITE)
goto err_prw;
r = -1;
+ /* Programs like cat can use this,
+ so we only check if it's a normal
+ file if not looping EAGAIN */
+ if (!loop_eagain) {
+ /*
+ * Checking on every run of prw()
+ * is expensive if called many
+ * times, but is defensive in
+ * case the status changes.
+ */
+ if (check_file(fd, &st) == -1)
+ return -1;
+ }
+
if (rw_type >= IO_PREAD)
positional_rw = 1; /* pread/pwrite */
else
@@ -1717,10 +2026,21 @@ prw(int fd, void *mem, size_t nrw,
try_rw_again:
if (!positional_rw) {
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+real_pread_pwrite:
+#endif
if (rw_type == IO_WRITE)
r = write(fd, mem, nrw);
else if (rw_type == IO_READ)
r = read(fd, mem, nrw);
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ else if (rw_type == IO_PWRITE)
+ r = pwrite(fd, mem, nrw, off);
+ else if (rw_type == IO_PREAD)
+ r = pread(fd, mem, nrw, off);
+#endif
if (r == -1 && (errno == try_err(loop_eintr, EINTR)
|| errno == try_err(loop_eagain, EAGAIN)))
@@ -1729,19 +2049,10 @@ try_rw_again:
return rw_over_nrw(r, nrw);
}
- flags = fcntl(fd, F_GETFL);
- if (flags == -1)
- return -1;
-
- /*
- * O_APPEND must not be used, because this
- * allows POSIX write() to ignore the
- * current write offset and write at EOF,
- * which would therefore break pread/pwrite
- */
- if (flags & O_APPEND)
- goto err_prw;
-
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ goto real_pread_pwrite;
+#else
if ((off_orig = lseek_loop(fd, (off_t)0, SEEK_CUR,
loop_eagain, loop_eintr)) == (off_t)-1)
r = -1;
@@ -1761,22 +2072,43 @@ try_rw_again:
|| errno == try_err(loop_eagain, EAGAIN)));
saved_errno = errno;
- if (lseek_loop(fd, off_orig, SEEK_SET,
- loop_eagain, loop_eintr) == (off_t)-1) {
- if (r < 0)
- errno = saved_errno;
+ off_last = lseek_loop(fd, off_orig, SEEK_SET,
+ loop_eagain, loop_eintr);
+ if (off_last == (off_t)-1) {
+ errno = saved_errno;
return -1;
}
+ if (off_last != off_orig)
+ goto err_prw;
errno = saved_errno;
return rw_over_nrw(r, nrw);
+#endif
err_prw:
errno = EIO;
return -1;
}
+static int
+check_file(int fd, struct stat *st)
+{
+ if (fstat(fd, st) == -1)
+ goto err_is_file;
+
+ if (!S_ISREG(st->st_mode))
+ goto err_is_file;
+
+ return 0;
+
+err_is_file:
+ errno = EIO;
+ return -1;
+}
+
/*
+ * Check overflows caused by buggy libc.
+ *
* POSIX can say whatever it wants.
* specification != implementation
*/
@@ -1818,6 +2150,13 @@ err_rw_over_nrw:
return -1;
}
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
+/*
+ * lseek_loop() does lseek() but optionally
+ * on an EINTR/EAGAIN wait loop. Used by prw()
+ * for setting offsets for positional I/O.
+ */
static off_t
lseek_loop(int fd, off_t off, int whence,
int loop_eagain, int loop_eintr)
@@ -1832,7 +2171,14 @@ lseek_loop(int fd, off_t off, int whence,
return old;
}
+#endif
+/*
+ * If a given error loop is enabled,
+ * e.g. EINTR or EAGAIN, an I/O operation
+ * will loop until errno isn't -1 and one
+ * of these, e.g. -1 and EINTR
+ */
static int
try_err(int loop_err, int errval)
{
@@ -1845,17 +2191,38 @@ try_err(int loop_err, int errval)
return -1;
}
+static int
+close_files(void)
+{
+ int close_err_gbe = 0;
+ int saved_errno = errno;
+
+ if (gbe_fd > -1) {
+ if (close(gbe_fd) == -1)
+ close_err_gbe = errno;
+ gbe_fd = -1;
+ }
+
+ if (saved_errno)
+ errno = saved_errno;
+
+ if (close_err_gbe)
+ return -1;
+
+ return 0;
+}
+
static void
err(int nvm_errval, const char *msg, ...)
{
va_list args;
- if (nvm_errval >= 0) {
- close_files();
- errno = nvm_errval;
- }
- if (errno <= 0)
+ if (errno < 0)
errno = ECANCELED;
+ if (!errno)
+ errno = nvm_errval;
+
+ (void)close_files();
fprintf(stderr, "%s: ", getnvmprogname());
@@ -1869,22 +2236,6 @@ err(int nvm_errval, const char *msg, ...)
exit(EXIT_FAILURE);
}
-static void
-close_files(void)
-{
- if (gbe_fd > -1) {
- if (close(gbe_fd) == -1)
- err(-1, "%s: close failed", fname);
- gbe_fd = -1;
- }
-
- if (urandom_fd > -1) {
- if (close(urandom_fd) == -1)
- err(-1, "%s: close failed", rname);
- urandom_fd = -1;
- }
-}
-
static const char *
getnvmprogname(void)
{
@@ -1902,7 +2253,7 @@ getnvmprogname(void)
}
static void
-usage(u8 usage_exit)
+usage(int usage_exit)
{
const char *util = getnvmprogname();