summaryrefslogtreecommitdiff
path: root/util/nvmutil
diff options
context:
space:
mode:
Diffstat (limited to 'util/nvmutil')
-rw-r--r--util/nvmutil/nvmutil.c692
1 files changed, 498 insertions, 194 deletions
diff --git a/util/nvmutil/nvmutil.c b/util/nvmutil/nvmutil.c
index e9046dc6..fe8364f7 100644
--- a/util/nvmutil/nvmutil.c
+++ b/util/nvmutil/nvmutil.c
@@ -15,6 +15,72 @@
* -Os -Wall -Wextra -Werror -pedantic -std=c90
*/
+#define OFF_ERR 0
+#ifndef OFF_RESET
+#define OFF_RESET 1
+#endif
+
+/*
+ * NOTE: older Linux lacked arc4random.
+ * added in glibc 2.36. Just pass HAVE_ARC4RANDOM_BUF=0
+ * at build time if you need old Linux / other libc.
+ */
+#if defined(__OpenBSD__) || defined(__FreeBSD__) || \
+ defined(__NetBSD__) || defined(__APPLE__) || \
+ defined(__linux__)
+#ifndef HAVE_ARC4RANDOM_BUF
+#define HAVE_ARC4RANDOM_BUF 1
+#endif
+#endif
+
+/*
+ * I/O config (build-time)
+ *
+ * Regarding:
+ * Retries on zero-return.
+ *
+ * 5 retries is generous,
+ * but also conservative.
+ * This is enough for e.g.
+ * slow USB flash drives,
+ * busy NFS servers, etc.
+ * Any more is too much
+ * and not of much benefit.
+ *
+ * 3-5 will tolerate buggy
+ * USB drives for example,
+ * but won't spin as long
+ * on really buggy and slow
+ * networks e.g. slow NFS.
+ *
+ * At least 3-5 recommended.
+ * Pass this at build time.
+ */
+#ifndef MAX_ZERO_RW_RETRY
+#define MAX_ZERO_RW_RETRY 5
+#endif
+/*
+ * 0: portable pread/pwrite
+ * 1: real pread/pwrite (thread-safe)
+ * Pass this at build-time
+ */
+#ifndef HAVE_REAL_PREAD_PWRITE
+#define HAVE_REAL_PREAD_PWRITE 0
+#endif
+/*
+ * Configure whether to wait on
+ * EINTR on files, or EAGAIN on
+ * cmd cat (stdout).
+ *
+ * Pass these at build time.
+ */
+#ifndef LOOP_EAGAIN
+#define LOOP_EAGAIN 1
+#endif
+#ifndef LOOP_EINTR
+#define LOOP_EINTR 1
+#endif
+
/*
* Major TODO: split this into multiple files.
* This program has become quite large now, mostly
@@ -157,6 +223,7 @@ also consider:
#include <fcntl.h>
#include <limits.h>
#include <stdarg.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -184,6 +251,12 @@ typedef char static_assert_int_ge_32[(sizeof(int) >= 4) ? 1 : -1];
typedef char static_assert_twos_complement[
((-1 & 3) == 3) ? 1 : -1
];
+typedef char assert_ulong_ptr[
+ (sizeof(ulong) >= sizeof(void *)) ? 1 : -1
+];
+typedef char assert_size_t_ptr[
+ (sizeof(size_t) >= sizeof(void *)) ? 1 : -1
+];
/*
* We set _FILE_OFFSET_BITS 64, but we only handle
@@ -247,12 +320,7 @@ static int xstrxcmp(const char *a, const char *b, size_t maxlen);
/*
* Prep files for reading
- *
- * Portability: /dev/urandom used
- * on Linux / old Unix, whereas
- * arc4random is used on BSD/MacOS.
*/
-static void open_dev_urandom(void);
static void open_gbe_file(void);
static void lock_gbe_file(void);
static void xopen(int *fd, const char *path, int flags, struct stat *st);
@@ -286,8 +354,10 @@ static void set_mac_nib(size_t mac_str_pos,
size_t mac_byte_pos, size_t mac_nib_pos);
static ushort hextonum(char ch_s);
static ushort rhex(void);
-static ushort fallback_rand(void);
+#if !defined(HAVE_ARC4RANDOM_BUF) || \
+ (HAVE_ARC4RANDOM_BUF) < 1
static ulong entropy_jitter(void);
+#endif
static void write_mac_part(size_t partnum);
/*
@@ -341,22 +411,27 @@ static off_t gbe_x_offset(size_t part, const char *f_op,
static ssize_t rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type);
static ssize_t rw_file_exact(int fd, u8 *mem, size_t len,
- off_t off, int rw_type, int loop_eagain, int loop_eintr);
-static ssize_t rw_file_once(int fd, u8 *mem, size_t len,
- off_t off, int rw_type, size_t rc, int loop_eagain,
- int loop_eintr);
+ off_t off, int rw_type, int loop_eagain, int loop_eintr,
+ size_t max_retries, int off_reset);
static ssize_t prw(int fd, void *mem, size_t nrw,
- off_t off, int rw_type, int loop_eagain, int loop_eintr);
-static int rw_over_nrw(ssize_t r, size_t nrw);
+ off_t off, int rw_type, int loop_eagain, int loop_eintr,
+ int off_reset);
+static int io_args(int fd, void *mem, size_t nrw,
+ off_t off, int rw_type);
+static int check_file(int fd, struct stat *st);
+static ssize_t rw_over_nrw(ssize_t r, size_t nrw);
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
static off_t lseek_loop(int fd, off_t off,
int whence, int loop_eagain, int loop_eintr);
+#endif
static int try_err(int loop_err, int errval);
/*
* Error handling and cleanup
*/
static void err(int nvm_errval, const char *msg, ...);
-static void close_files(void);
+static int close_files(void);
static const char *getnvmprogname(void);
static void usage(int usage_exit);
@@ -388,18 +463,12 @@ static void usage(int usage_exit);
#define NVM_WORDS (NVM_SIZE >> 1)
#define NVM_CHECKSUM_WORD (NVM_WORDS - 1)
-#define NUM_RANDOM_BYTES 12
-static u8 rnum[NUM_RANDOM_BYTES];
-
/*
* Portable macro based on BSD nitems.
* Used to count the number of commands (see below).
*/
#define items(x) (sizeof((x)) / sizeof((x)[0]))
-static const char newrandom[] = "/dev/urandom";
-static const char *rname = NULL;
-
/*
* GbE files can be 8KB, 16KB or 128KB,
* but we only need the two 4KB parts
@@ -416,7 +485,6 @@ static u8 *buf = real_buf;
static ushort mac_buf[3];
static off_t gbe_file_size;
-static int urandom_fd = -1;
static int gbe_fd = -1;
static size_t part;
static u8 part_modified[2];
@@ -444,6 +512,9 @@ static const char *argv0;
#define ARGC_3 3
#define ARGC_4 4
+#define NO_LOOP_EAGAIN 0
+#define NO_LOOP_EINTR 0
+
enum {
IO_READ,
IO_WRITE,
@@ -578,8 +649,6 @@ typedef char assert_read[(IO_READ==0)?1:-1];
typedef char assert_write[(IO_WRITE==1)?1:-1];
typedef char assert_pread[(IO_PREAD==2)?1:-1];
typedef char assert_pwrite[(IO_PWRITE==3)?1:-1];
-typedef char assert_rand_byte[(NUM_RANDOM_BYTES>0)?1:-1];
-typedef char assert_rand_len[(NUM_RANDOM_BYTES<NVM_SIZE)?1:-1];
/* commands */
typedef char assert_cmd_dump[(CMD_DUMP==0)?1:-1];
typedef char assert_cmd_setmac[(CMD_SETMAC==1)?1:-1];
@@ -603,8 +672,12 @@ typedef char bool_skip_checksum_write[(SKIP_CHECKSUM_WRITE==0)?1:-1];
typedef char bool_checksum_write[(CHECKSUM_WRITE==1)?1:-1];
typedef char bool_no_invert[(NO_INVERT==0)?1:-1];
typedef char bool_part_invert[(PART_INVERT==1)?1:-1];
-
-static int use_prng = 0;
+typedef char bool_loop_eintr[(LOOP_EINTR==1||LOOP_EINTR==0)?1:-1];
+typedef char bool_loop_eagain[(LOOP_EAGAIN==1||LOOP_EAGAIN==0)?1:-1];
+typedef char bool_no_loop_eintr[(NO_LOOP_EINTR==0)?1:-1];
+typedef char bool_no_loop_eagain[(NO_LOOP_EAGAIN==0)?1:-1];
+typedef char bool_off_err[(OFF_ERR==0)?1:-1];
+typedef char bool_off_reset[(OFF_RESET==0||OFF_RESET==1)?1:-1];
static int io_err_gbe = 0;
static int rw_check_err_read[] = {0, 0};
@@ -613,6 +686,14 @@ static int rw_check_bad_part[] = {0, 0};
static int post_rw_checksum[] = {0, 0};
+static dev_t gbe_dev;
+static ino_t gbe_ino;
+
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
+void arc4random_buf(void *buf, size_t n);
+#endif
+
int
main(int argc, char *argv[])
{
@@ -624,12 +705,12 @@ main(int argc, char *argv[])
#ifdef NVMUTIL_PLEDGE
#ifdef NVMUTIL_UNVEIL
- if (pledge("stdio rpath wpath unveil", NULL) == -1)
+ if (pledge("stdio flock rpath wpath unveil", NULL) == -1)
err(errno, "pledge");
- if (unveil("/dev/urandom", "r") == -1)
- err(errno, "unveil /dev/urandom");
+ if (unveil("/dev/null", "r") == -1)
+ err(errno, "unveil /dev/null");
#else
- if (pledge("stdio rpath wpath", NULL) == -1)
+ if (pledge("stdio flock rpath wpath", NULL) == -1)
err(errno, "pledge");
#endif
#endif
@@ -646,25 +727,28 @@ main(int argc, char *argv[])
err(errno, "%s: unveil ro", fname);
if (unveil(NULL, NULL) == -1)
err(errno, "unveil block (ro)");
- if (pledge("stdio rpath", NULL) == -1)
+ if (pledge("stdio flock rpath", NULL) == -1)
err(errno, "pledge ro (kill unveil)");
} else {
if (unveil(fname, "rw") == -1)
err(errno, "%s: unveil rw", fname);
if (unveil(NULL, NULL) == -1)
err(errno, "unveil block (rw)");
- if (pledge("stdio rpath wpath", NULL) == -1)
+ if (pledge("stdio flock rpath wpath", NULL) == -1)
err(errno, "pledge rw (kill unveil)");
}
#else
if (command[cmd_index].flags == O_RDONLY) {
- if (pledge("stdio rpath", NULL) == -1)
+ if (pledge("stdio flock rpath", NULL) == -1)
err(errno, "pledge ro");
}
#endif
#endif
- open_dev_urandom();
+#if !defined(HAVE_ARC4RANDOM_BUF) || \
+ (HAVE_ARC4RANDOM_BUF) < 1
+ srand((uint)(time(NULL) ^ getpid()));
+#endif
open_gbe_file();
lock_gbe_file();
@@ -705,7 +789,8 @@ main(int argc, char *argv[])
err(EIO, "%s: bad write", fname);
}
- close_files();
+ if (close_files() == -1)
+ err(EIO, "%s: close", fname);
return EXIT_SUCCESS;
}
@@ -862,11 +947,17 @@ xstrxcmp(const char *a, const char *b, size_t maxlen)
err(EINVAL, "Empty string in xstrxcmp");
for (i = 0; i < maxlen; i++) {
- if (a[i] != b[i])
- return (u8)a[i] - (u8)b[i];
+ u8 ac = (u8)a[i];
+ u8 bc = (u8)b[i];
+
+ if (ac == '\0' || bc == '\0') {
+ if (ac == bc)
+ return 0;
+ return ac - bc;
+ }
- if (a[i] == '\0')
- return 0;
+ if (ac != bc)
+ return ac - bc;
}
/*
@@ -882,26 +973,39 @@ xstrxcmp(const char *a, const char *b, size_t maxlen)
}
static void
-open_dev_urandom(void)
-{
- rname = newrandom;
- urandom_fd = open(rname, O_RDONLY);
- if (urandom_fd != -1)
- return;
-
- /* fallback on VERY VERY VERY old unix */
- use_prng = 1;
- srand((uint)(time(NULL) ^ getpid()));
-}
-
-static void
open_gbe_file(void)
{
struct stat gbe_st;
+ int flags;
xopen(&gbe_fd, fname,
command[cmd_index].flags | O_BINARY | O_NOFOLLOW, &gbe_st);
+ /* inode will be checked later on write */
+ gbe_dev = gbe_st.st_dev;
+ gbe_ino = gbe_st.st_ino;
+
+ if (gbe_st.st_nlink > 1)
+ fprintf(stderr,
+ "%s: warning: file has %lu hard links\n",
+ fname, (ulong)gbe_st.st_nlink);
+
+ if (gbe_st.st_nlink == 0)
+ err(EIO, "%s: file unlinked while open", fname);
+
+ flags = fcntl(gbe_fd, F_GETFL);
+ if (flags == -1)
+ err(errno, "%s: fcntl(F_GETFL)", fname);
+
+ /*
+ * O_APPEND must not be used, because this
+ * allows POSIX write() to ignore the
+ * current write offset and write at EOF,
+ * which would therefore break pread/pwrite
+ */
+ if (flags & O_APPEND)
+ err(EIO, "%s: O_APPEND flag");
+
gbe_file_size = gbe_st.st_size;
switch (gbe_file_size) {
@@ -943,6 +1047,9 @@ xopen(int *fd_ptr, const char *path, int flags, struct stat *st)
if (!S_ISREG(st->st_mode))
err(errno, "%s: not a regular file", path);
+
+ if (lseek(*fd_ptr, 0, SEEK_CUR) == (off_t)-1)
+ err(errno, "%s: file not seekable", path);
}
static void
@@ -1181,25 +1288,24 @@ hextonum(char ch_s)
return 16; /* invalid character */
}
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
static ushort
rhex(void)
{
+ static u8 num[12];
static size_t n = 0;
- if (use_prng)
- return fallback_rand();
-
if (!n) {
- n = sizeof(rnum);
- if (rw_file_exact(urandom_fd, rnum, n, 0, IO_READ, 0, 1) == -1)
- err(errno, "Randomisation failed");
+ n = 12;
+ arc4random_buf(num, 12);
}
- return (ushort)(rnum[--n] & 0xf);
+ return num[--n] & 0xf;
}
-
+#else
static ushort
-fallback_rand(void)
+rhex(void)
{
struct timeval tv;
ulong mix;
@@ -1252,6 +1358,7 @@ entropy_jitter(void)
return mix;
}
+#endif
static void
write_mac_part(size_t partnum)
@@ -1349,7 +1456,7 @@ cmd_helper_cat(void)
fflush(NULL);
for (p = 0; p < 2; p++) {
- gbe_cat_buf(buf + (p * GBE_PART_SIZE));
+ gbe_cat_buf(buf + (size_t)(p * GBE_PART_SIZE));
for (ff = 0; ff < n; ff++)
gbe_cat_buf(pad);
@@ -1360,7 +1467,8 @@ static void
gbe_cat_buf(u8 *b)
{
if (rw_file_exact(STDOUT_FILENO, b,
- GBE_PART_SIZE, 0, IO_WRITE, 1, 1) < 0)
+ GBE_PART_SIZE, 0, IO_WRITE, LOOP_EAGAIN, LOOP_EINTR,
+ MAX_ZERO_RW_RETRY, OFF_ERR) < 0)
err(errno, "stdout: cat");
}
@@ -1383,6 +1491,9 @@ write_gbe_file(void)
if (fstat(gbe_fd, &gbe_st) == -1)
err(errno, "%s: re-check", fname);
+ if (gbe_st.st_dev != gbe_dev || gbe_st.st_ino != gbe_ino)
+ err(EIO, "%s: file replaced while open", fname);
+
if (gbe_st.st_size != gbe_file_size)
err(errno, "%s: file size changed before write", fname);
@@ -1558,6 +1669,7 @@ check_written_part(size_t p)
u8 *mem_offset;
off_t file_offset;
u8 *buf_restore;
+ struct stat st;
if (!part_modified[p])
return;
@@ -1568,6 +1680,14 @@ check_written_part(size_t p)
mem_offset = gbe_mem_offset(p, "pwrite");
file_offset = (off_t)gbe_file_offset(p, "pwrite");
+ memset(pad, 0xff, sizeof(pad));
+
+ if (fstat(gbe_fd, &st) == -1)
+ err(errno, "%s: fstat (post-write)", fname);
+
+ if (st.st_dev != gbe_dev || st.st_ino != gbe_ino)
+ err(EIO, "%s: file changed during write", fname);
+
r = rw_gbe_file_exact(gbe_fd, pad,
gbe_rw_size, file_offset, IO_PREAD);
@@ -1578,6 +1698,10 @@ check_written_part(size_t p)
else if (memcmp(mem_offset, pad, gbe_rw_size) != 0)
rw_check_bad_part[p] = io_err_gbe = 1;
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p])
+ return;
+
/*
* We only load one part on-file, into memory but
* always at offset zero, for post-write checks.
@@ -1614,10 +1738,15 @@ report_io_err_rw(void)
"%s: pwrite: corrupt write on p%lu\n",
fname, (ulong)p);
- /*
- * so that we can re-use main checksumming features
- * correct part to read always part 0
- */
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p]) {
+ fprintf(stderr,
+ "%s: p%lu: skipped checksum verification "
+ "(because read failed)\n",
+ fname, (ulong)p);
+
+ continue;
+ }
fprintf(stderr, "%s: ", fname);
@@ -1648,7 +1777,7 @@ gbe_mem_offset(size_t p, const char *f_op)
off_t gbe_off = gbe_x_offset(p, f_op, "mem",
GBE_PART_SIZE, GBE_FILE_SIZE);
- return (u8 *)(buf + gbe_off);
+ return (u8 *)(buf + (size_t)gbe_off);
}
/*
@@ -1692,13 +1821,23 @@ static ssize_t
rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type)
{
- if (mem == NULL)
- goto err_rw_gbe_file_exact;
+ size_t mem_addr;
+ size_t buf_addr;
+ ssize_t r;
- if (mem != (void *)pad
- && mem != (void *)rnum
- && (mem < buf || mem >= (buf + GBE_FILE_SIZE)))
- goto err_rw_gbe_file_exact;
+ if (io_args(fd, mem, nrw, off, rw_type) == -1)
+ return -1;
+
+ mem_addr = (size_t)(void *)mem;
+ buf_addr = (size_t)(void *)buf;
+
+ if (mem != (void *)pad) {
+ if (mem_addr < buf_addr)
+ goto err_rw_gbe_file_exact;
+
+ if ((mem_addr - buf_addr) >= (size_t)GBE_FILE_SIZE)
+ goto err_rw_gbe_file_exact;
+ }
if (off < 0 || off >= gbe_file_size)
goto err_rw_gbe_file_exact;
@@ -1706,10 +1845,14 @@ rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
if (nrw > (size_t)(gbe_file_size - off))
goto err_rw_gbe_file_exact;
- if (nrw > GBE_PART_SIZE)
+ if (nrw > (size_t)GBE_PART_SIZE)
goto err_rw_gbe_file_exact;
- return rw_file_exact(fd, mem, nrw, off, rw_type, 0, 1);
+ r = rw_file_exact(fd, mem, nrw, off, rw_type,
+ NO_LOOP_EAGAIN, LOOP_EINTR, MAX_ZERO_RW_RETRY,
+ OFF_ERR);
+
+ return rw_over_nrw(r, nrw);
err_rw_gbe_file_exact:
errno = EIO;
@@ -1717,92 +1860,89 @@ err_rw_gbe_file_exact:
}
/*
- * Read or write the exact contents of a file,
- * along with a buffer, (if applicable) offset,
- * and number of bytes to be read. It unifies
- * the functionality of read(), pread(), write()
- * and pwrite(), with retry-on-EINTR and also
- * prevents infinite loop on zero-reads.
- *
- * The pread() and pwrite() functionality are
- * provided by yet another portable function,
- * prw() - see notes below.
+ * Safe I/O functions wrapping around
+ * read(), write() and providing a portable
+ * analog of both pread() and pwrite().
+ * These functions are designed for maximum
+ * robustness, checking NULL inputs, overflowed
+ * outputs, and all kinds of errors that the
+ * standard libc functions don't.
*
- * This must only be used on files. It cannot
- * be used on sockets or pipes, because 0-byte
- * reads are treated like fatal errors. This
- * means that EOF is also considered fatal.
+ * Looping on EINTR and EAGAIN is supported.
+ * EINTR/EAGAIN looping is done indefinitely.
*/
-static ssize_t
-rw_file_exact(int fd, u8 *mem, size_t nrw,
- off_t off, int rw_type, int loop_eagain,
- int loop_eintr)
-{
- ssize_t rv;
- size_t rc;
-
- for (rc = 0, rv = 0; rc < nrw; ) {
- if ((rv = rw_file_once(fd, mem, nrw, off, rw_type, rc,
- loop_eagain, loop_eintr)) < 0)
- return -1;
-
- /* rw_file_once never returns
- zero, but it's still logically
- incorrect not to handle it here */
-
- if (rv == 0) {
- errno = EIO;
- return -1;
- }
-
- rc += (size_t)rv;
- }
-
- return rc;
-}
/*
- * Helper function for rw_file_exact, that
- * also does extra error handling pertaining
- * to GbE file offsets.
+ * rw_file_exact() - Read perfectly or die
*
- * May not return all requested bytes (nrw).
- * Use rw_file_exact for guaranteed length.
+ * Read/write, and absolutely insist on an
+ * absolute read; e.g. if 100 bytes are
+ * requested, this MUST return 100.
*
* This function will never return zero.
* It will only return below (error),
* or above (success). On error, -1 is
* returned and errno is set accordingly.
+ *
+ * Zero-byte returns are not allowed.
+ * It will re-spin a finite number of
+ * times upon zero-return, to recover,
+ * otherwise it will return an error.
*/
static ssize_t
-rw_file_once(int fd, u8 *mem, size_t nrw,
- off_t off, int rw_type, size_t rc,
- int loop_eagain, int loop_eintr)
+rw_file_exact(int fd, u8 *mem, size_t nrw,
+ off_t off, int rw_type, int loop_eagain,
+ int loop_eintr, size_t max_retries,
+ int off_reset)
{
- ssize_t rv;
+ ssize_t rv = 0;
+ ssize_t rc = 0;
size_t retries_on_zero = 0;
- size_t max_retries = 10;
+ off_t off_cur;
+ size_t nrw_cur;
+ void *mem_cur;
- if (mem == NULL)
- goto err_rw_file_once;
+ if (io_args(fd, mem, nrw, off, rw_type) == -1)
+ return -1;
-read_again:
- rv = prw(fd, mem + rc, nrw - rc, off + rc, rw_type,
- loop_eagain, loop_eintr);
+ while (1) {
+
+ /* Prevent theoretical overflow */
+ if (rv >= 0 && (size_t)rv > (nrw - rc))
+ goto err_rw_file_exact;
- if (rv < 0)
- return -1;
+ rc += rv;
+ if ((size_t)rc >= nrw)
+ break;
+
+ mem_cur = (void *)(mem + (size_t)rc);
+ nrw_cur = (size_t)(nrw - (size_t)rc);
+ if (off < 0)
+ goto err_rw_file_exact;
+ off_cur = (off_t)((size_t)off + (size_t)rc);
+
+ rv = prw(fd, mem_cur, nrw_cur, off_cur,
+ rw_type, loop_eagain, loop_eintr,
+ off_reset);
+
+ if (rv < 0)
+ return -1;
- if ((size_t)rv > (nrw - rc))/* don't overflow */
- goto err_rw_file_once;
+ if (rv == 0) {
+ if (retries_on_zero++ < max_retries)
+ continue;
+ goto err_rw_file_exact;
+ }
- if (rv != 0)
- return rv;
+ retries_on_zero = 0;
+ }
- if (retries_on_zero++ < max_retries)
- goto read_again;
+ if ((size_t)rc != nrw)
+ goto err_rw_file_exact;
-err_rw_file_once:
+ return rw_over_nrw(rc, nrw);
+
+err_rw_file_exact:
errno = EIO;
return -1;
}
@@ -1818,6 +1958,9 @@ err_rw_file_once:
* This limitation is acceptable, since nvmutil is
* single-threaded. Portability is the main goal.
*
+ * If you need real pwrite/pread, just compile
+ * with flag: HAVE_REAL_PREAD_PWRITE=1
+ *
* A fallback is provided for regular read/write.
* rw_type can be IO_READ, IO_WRITE, IO_PREAD
* or IO_PWRITE
@@ -1830,31 +1973,58 @@ err_rw_file_once:
* also mitigates a few theoretical libc bugs.
* It is designed for extremely safe single-threaded
* I/O on applications that need it.
+ *
+ * NOTE: If you use loop_eagain (1), you enable wait
+ * loop on EAGAIN. Beware if using this on a non-blocking
+ * pipe (it could spin indefinitely).
+ *
+ * off_reset: if zero, and using fallback pwrite/pread
+ * analogs, we check if a file offset changed,
+ * which would indicate another thread changed
+ * it, and return error, without resetting the
+ * file - this would allow that thread to keep
+ * running, but we could then cause a whole
+ * program exit if we wanted to.
+ * if not zero:
+ * we reset and continue, and pray for the worst.
*/
static ssize_t
prw(int fd, void *mem, size_t nrw,
off_t off, int rw_type,
- int loop_eagain, int loop_eintr)
+ int loop_eagain, int loop_eintr,
+ int off_reset)
{
- off_t off_orig;
ssize_t r;
- int saved_errno;
- int flags;
int positional_rw;
+ struct stat st;
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
+ int saved_errno;
+ off_t verified;
+ off_t off_orig;
+ off_t off_last;
+#endif
- if (mem == NULL)
- goto err_prw;
-
- if (fd < 0
- || off < 0
- || !nrw /* prevent zero read request */
- || nrw > (size_t)SSIZE_MAX /* prevent overflow */
- || (uint)rw_type > IO_PWRITE)
- goto err_prw;
+ if (io_args(fd, mem, nrw, off, rw_type) == -1)
+ return -1;
r = -1;
+ /* Programs like cat can use this,
+ so we only check if it's a normal
+ file if not looping EAGAIN */
+ if (!loop_eagain) {
+ /*
+ * Checking on every run of prw()
+ * is expensive if called many
+ * times, but is defensive in
+ * case the status changes.
+ */
+ if (check_file(fd, &st) == -1)
+ return -1;
+ }
+
if (rw_type >= IO_PREAD)
positional_rw = 1; /* pread/pwrite */
else
@@ -1863,10 +2033,21 @@ prw(int fd, void *mem, size_t nrw,
try_rw_again:
if (!positional_rw) {
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+real_pread_pwrite:
+#endif
if (rw_type == IO_WRITE)
r = write(fd, mem, nrw);
else if (rw_type == IO_READ)
r = read(fd, mem, nrw);
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ else if (rw_type == IO_PWRITE)
+ r = pwrite(fd, mem, nrw, off);
+ else if (rw_type == IO_PREAD)
+ r = pread(fd, mem, nrw, off);
+#endif
if (r == -1 && (errno == try_err(loop_eintr, EINTR)
|| errno == try_err(loop_eagain, EAGAIN)))
@@ -1875,59 +2056,165 @@ try_rw_again:
return rw_over_nrw(r, nrw);
}
- flags = fcntl(fd, F_GETFL);
- if (flags == -1)
- return -1;
-
- /*
- * O_APPEND must not be used, because this
- * allows POSIX write() to ignore the
- * current write offset and write at EOF,
- * which would therefore break pread/pwrite
- */
- if (flags & O_APPEND)
- goto err_prw;
-
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ goto real_pread_pwrite;
+#else
if ((off_orig = lseek_loop(fd, (off_t)0, SEEK_CUR,
- loop_eagain, loop_eintr)) == (off_t)-1)
+ loop_eagain, loop_eintr)) == (off_t)-1) {
r = -1;
- else if (lseek_loop(fd, off, SEEK_SET,
- loop_eagain, loop_eintr) == (off_t)-1)
+ } else if (lseek_loop(fd, off, SEEK_SET,
+ loop_eagain, loop_eintr) == (off_t)-1) {
r = -1;
+ } else {
+ verified = lseek_loop(fd, (off_t)0, SEEK_CUR,
+ loop_eagain, loop_eintr);
- do {
- if (rw_type == IO_PREAD)
- r = read(fd, mem, nrw);
- else if (rw_type == IO_PWRITE)
- r = write(fd, mem, nrw);
-
- r = rw_over_nrw(r, nrw);
- } while (r == -1 &&
- (errno == try_err(loop_eintr, EINTR)
- || errno == try_err(loop_eagain, EAGAIN)));
+ /*
+ * Partial thread-safety: detect
+ * if the offset changed to what
+ * we previously got. If it did,
+ * then another thread may have
+ * changed it. Enabled if
+ * off_reset is OFF_RESET.
+ *
+ * We do this *once*, on the theory
+ * that nothing is touching it now.
+ */
+ if (off_reset && off != verified)
+ lseek_loop(fd, off, SEEK_SET,
+ loop_eagain, loop_eintr);
+
+ do {
+ /*
+ * Verify again before I/O
+ * (even with OFF_ERR)
+ *
+ * This implements the first check
+ * even with OFF_ERR, but without
+ * the recovery. On ERR_RESET, if
+ * the check fails again, then we
+ * know something else is touching
+ * the file, so it's best that we
+ * probably leave it alone and err.
+ *
+ * In other words, ERR_RESET only
+ * tolerates one change. Any more
+ * will cause an exit, including
+ * per EINTR/EAGAIN re-spin.
+ */
+ verified = lseek_loop(fd, (off_t)0, SEEK_CUR,
+ loop_eagain, loop_eintr);
+
+ if (off != verified)
+ goto err_prw;
+
+ if (rw_type == IO_PREAD)
+ r = read(fd, mem, nrw);
+ else if (rw_type == IO_PWRITE)
+ r = write(fd, mem, nrw);
+
+ if (rw_over_nrw(r, nrw) == -1) {
+ errno = EIO;
+ break;
+ }
+
+ } while (r == -1 &&
+ (errno == try_err(loop_eintr, EINTR)
+ || errno == try_err(loop_eagain, EAGAIN)));
+ }
saved_errno = errno;
- if (lseek_loop(fd, off_orig, SEEK_SET,
- loop_eagain, loop_eintr) == (off_t)-1) {
+
+ off_last = lseek_loop(fd, off_orig, SEEK_SET,
+ loop_eagain, loop_eintr);
+
+ if (off_last != off_orig) {
errno = saved_errno;
- return -1;
+ goto err_prw;
}
+
errno = saved_errno;
return rw_over_nrw(r, nrw);
+#endif
err_prw:
errno = EIO;
return -1;
}
+static int
+io_args(int fd, void *mem, size_t nrw,
+ off_t off, int rw_type)
+{
+ /* obviously */
+ if (mem == NULL)
+ goto err_io_args;
+
+ /* uninitialised fd */
+ if (fd < 0)
+ goto err_io_args;
+
+ /* negative offset */
+ if (off < 0)
+ goto err_io_args;
+
+ /* prevent zero-byte rw */
+ if (!nrw)
+ goto err_io_args;
+
+ /* prevent overflow */
+ if (nrw > (size_t)SSIZE_MAX)
+ goto err_io_args;
+
+ /* prevent overflow */
+ if (((size_t)off + nrw) < (size_t)off)
+ goto err_io_args;
+
+ if (rw_type > IO_PWRITE)
+ goto err_io_args;
+
+ return 0;
+
+err_io_args:
+ errno = EIO;
+ return -1;
+}
+
+static int
+check_file(int fd, struct stat *st)
+{
+ if (fstat(fd, st) == -1)
+ goto err_is_file;
+
+ if (!S_ISREG(st->st_mode))
+ goto err_is_file;
+
+ return 0;
+
+err_is_file:
+ errno = EIO;
+ return -1;
+}
+
/*
+ * Check overflows caused by buggy libc.
+ *
* POSIX can say whatever it wants.
* specification != implementation
*/
-static int
+static ssize_t
rw_over_nrw(ssize_t r, size_t nrw)
{
+ /*
+ * If a byte length of zero
+ * was requested, that is
+ * clearly a bug. No way.
+ */
+ if (!nrw)
+ goto err_rw_over_nrw;
+
if (r == -1)
return r;
@@ -1963,6 +2250,13 @@ err_rw_over_nrw:
return -1;
}
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
+/*
+ * lseek_loop() does lseek() but optionally
+ * on an EINTR/EAGAIN wait loop. Used by prw()
+ * for setting offsets for positional I/O.
+ */
static off_t
lseek_loop(int fd, off_t off, int whence,
int loop_eagain, int loop_eintr)
@@ -1977,7 +2271,14 @@ lseek_loop(int fd, off_t off, int whence,
return old;
}
+#endif
+/*
+ * If a given error loop is enabled,
+ * e.g. EINTR or EAGAIN, an I/O operation
+ * will loop until errno isn't -1 and one
+ * of these, e.g. -1 and EINTR
+ */
static int
try_err(int loop_err, int errval)
{
@@ -1995,12 +2296,10 @@ err(int nvm_errval, const char *msg, ...)
{
va_list args;
- if (nvm_errval >= 0) {
- close_files();
+ if (errno == 0)
errno = nvm_errval;
- }
- if (errno <= 0)
- errno = ECANCELED;
+
+ (void)close_files();
fprintf(stderr, "%s: ", getnvmprogname());
@@ -2014,20 +2313,25 @@ err(int nvm_errval, const char *msg, ...)
exit(EXIT_FAILURE);
}
-static void
+static int
close_files(void)
{
+ int close_err_gbe = 0;
+ int saved_errno = errno;
+
if (gbe_fd > -1) {
if (close(gbe_fd) == -1)
- err(-1, "%s: close failed", fname);
+ close_err_gbe = errno;
gbe_fd = -1;
}
- if (urandom_fd > -1) {
- if (close(urandom_fd) == -1)
- err(-1, "%s: close failed", rname);
- urandom_fd = -1;
- }
+ if (saved_errno)
+ errno = saved_errno;
+
+ if (close_err_gbe)
+ return -1;
+
+ return 0;
}
static const char *