summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/nvmutil/nvmutil.c558
1 files changed, 380 insertions, 178 deletions
diff --git a/util/nvmutil/nvmutil.c b/util/nvmutil/nvmutil.c
index e9046dc6..4a550e71 100644
--- a/util/nvmutil/nvmutil.c
+++ b/util/nvmutil/nvmutil.c
@@ -16,6 +16,67 @@
*/
/*
+ * NOTE: older Linux lacked arc4random.
+ * added in glibc 2.36. Just pass HAVE_ARC4RANDOM_BUF=0
+ * at build time if you need old Linux / other libc.
+ */
+#if defined(__OpenBSD__) || defined(__FreeBSD__) || \
+ defined(__NetBSD__) || defined(__APPLE__) || \
+ defined(__linux__)
+#ifndef HAVE_ARC4RANDOM_BUF
+#define HAVE_ARC4RANDOM_BUF 1
+#endif
+#endif
+
+/*
+ * I/O config (build-time)
+ *
+ * Regarding:
+ * Retries on zero-return.
+ *
+ * 5 retries is generous,
+ * but also conservative.
+ * This is enough for e.g.
+ * slow USB flash drives,
+ * busy NFS servers, etc.
+ * Any more is too much
+ * and not of much benefit.
+ *
+ * 3-5 will tolerate buggy
+ * USB drives for example,
+ * but won't spin as long
+ * on really buggy and slow
+ * networks e.g. slow NFS.
+ *
+ * At least 3-5 recommended.
+ * Pass this at build time.
+ */
+#ifndef MAX_ZERO_RW_RETRY
+#define MAX_ZERO_RW_RETRY 5
+#endif
+/*
+ * 0: portable pread/pwrite
+ * 1: real pread/pwrite (thread-safe)
+ * Pass this at build-time
+ */
+#ifndef HAVE_REAL_PREAD_PWRITE
+#define HAVE_REAL_PREAD_PWRITE 0
+#endif
+/*
+ * Configure whether to wait on
+ * EINTR on files, or EAGAIN on
+ * cmd cat (stdout).
+ *
+ * Pass these at build time.
+ */
+#ifndef LOOP_EAGAIN
+#define LOOP_EAGAIN 1
+#endif
+#ifndef LOOP_EINTR
+#define LOOP_EINTR 1
+#endif
+
+/*
* Major TODO: split this into multiple files.
* This program has become quite large now, mostly
* due to all the extra sanity checks / portability.
@@ -157,6 +218,7 @@ also consider:
#include <fcntl.h>
#include <limits.h>
#include <stdarg.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -184,6 +246,12 @@ typedef char static_assert_int_ge_32[(sizeof(int) >= 4) ? 1 : -1];
typedef char static_assert_twos_complement[
((-1 & 3) == 3) ? 1 : -1
];
+typedef char assert_ulong_ptr[
+ (sizeof(ulong) >= sizeof(void *)) ? 1 : -1
+];
+typedef char assert_size_t_ptr[
+ (sizeof(size_t) >= sizeof(void *)) ? 1 : -1
+];
/*
* We set _FILE_OFFSET_BITS 64, but we only handle
@@ -247,12 +315,7 @@ static int xstrxcmp(const char *a, const char *b, size_t maxlen);
/*
* Prep files for reading
- *
- * Portability: /dev/urandom used
- * on Linux / old Unix, whereas
- * arc4random is used on BSD/MacOS.
*/
-static void open_dev_urandom(void);
static void open_gbe_file(void);
static void lock_gbe_file(void);
static void xopen(int *fd, const char *path, int flags, struct stat *st);
@@ -286,8 +349,10 @@ static void set_mac_nib(size_t mac_str_pos,
size_t mac_byte_pos, size_t mac_nib_pos);
static ushort hextonum(char ch_s);
static ushort rhex(void);
-static ushort fallback_rand(void);
+#if !defined(HAVE_ARC4RANDOM_BUF) || \
+ (HAVE_ARC4RANDOM_BUF) < 1
static ulong entropy_jitter(void);
+#endif
static void write_mac_part(size_t partnum);
/*
@@ -341,22 +406,24 @@ static off_t gbe_x_offset(size_t part, const char *f_op,
static ssize_t rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type);
static ssize_t rw_file_exact(int fd, u8 *mem, size_t len,
- off_t off, int rw_type, int loop_eagain, int loop_eintr);
-static ssize_t rw_file_once(int fd, u8 *mem, size_t len,
- off_t off, int rw_type, size_t rc, int loop_eagain,
- int loop_eintr);
+ off_t off, int rw_type, int loop_eagain, int loop_eintr,
+ size_t max_retries);
static ssize_t prw(int fd, void *mem, size_t nrw,
off_t off, int rw_type, int loop_eagain, int loop_eintr);
-static int rw_over_nrw(ssize_t r, size_t nrw);
+static int check_file(int fd, struct stat *st);
+static ssize_t rw_over_nrw(ssize_t r, size_t nrw);
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
static off_t lseek_loop(int fd, off_t off,
int whence, int loop_eagain, int loop_eintr);
+#endif
static int try_err(int loop_err, int errval);
/*
* Error handling and cleanup
*/
+static int close_files(void);
static void err(int nvm_errval, const char *msg, ...);
-static void close_files(void);
static const char *getnvmprogname(void);
static void usage(int usage_exit);
@@ -388,18 +455,12 @@ static void usage(int usage_exit);
#define NVM_WORDS (NVM_SIZE >> 1)
#define NVM_CHECKSUM_WORD (NVM_WORDS - 1)
-#define NUM_RANDOM_BYTES 12
-static u8 rnum[NUM_RANDOM_BYTES];
-
/*
* Portable macro based on BSD nitems.
* Used to count the number of commands (see below).
*/
#define items(x) (sizeof((x)) / sizeof((x)[0]))
-static const char newrandom[] = "/dev/urandom";
-static const char *rname = NULL;
-
/*
* GbE files can be 8KB, 16KB or 128KB,
* but we only need the two 4KB parts
@@ -416,7 +477,6 @@ static u8 *buf = real_buf;
static ushort mac_buf[3];
static off_t gbe_file_size;
-static int urandom_fd = -1;
static int gbe_fd = -1;
static size_t part;
static u8 part_modified[2];
@@ -444,6 +504,9 @@ static const char *argv0;
#define ARGC_3 3
#define ARGC_4 4
+#define NO_LOOP_EAGAIN 0
+#define NO_LOOP_EINTR 0
+
enum {
IO_READ,
IO_WRITE,
@@ -578,8 +641,6 @@ typedef char assert_read[(IO_READ==0)?1:-1];
typedef char assert_write[(IO_WRITE==1)?1:-1];
typedef char assert_pread[(IO_PREAD==2)?1:-1];
typedef char assert_pwrite[(IO_PWRITE==3)?1:-1];
-typedef char assert_rand_byte[(NUM_RANDOM_BYTES>0)?1:-1];
-typedef char assert_rand_len[(NUM_RANDOM_BYTES<NVM_SIZE)?1:-1];
/* commands */
typedef char assert_cmd_dump[(CMD_DUMP==0)?1:-1];
typedef char assert_cmd_setmac[(CMD_SETMAC==1)?1:-1];
@@ -603,8 +664,10 @@ typedef char bool_skip_checksum_write[(SKIP_CHECKSUM_WRITE==0)?1:-1];
typedef char bool_checksum_write[(CHECKSUM_WRITE==1)?1:-1];
typedef char bool_no_invert[(NO_INVERT==0)?1:-1];
typedef char bool_part_invert[(PART_INVERT==1)?1:-1];
-
-static int use_prng = 0;
+typedef char bool_loop_eintr[(LOOP_EINTR==1||LOOP_EINTR==0)?1:-1];
+typedef char bool_loop_eagain[(LOOP_EAGAIN==1||LOOP_EAGAIN==0)?1:-1];
+typedef char bool_no_loop_eintr[(NO_LOOP_EINTR==0)?1:-1];
+typedef char bool_no_loop_eagain[(NO_LOOP_EAGAIN==0)?1:-1];
static int io_err_gbe = 0;
static int rw_check_err_read[] = {0, 0};
@@ -613,6 +676,14 @@ static int rw_check_bad_part[] = {0, 0};
static int post_rw_checksum[] = {0, 0};
+static dev_t gbe_dev;
+static ino_t gbe_ino;
+
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
+void arc4random_buf(void *buf, size_t n);
+#endif
+
int
main(int argc, char *argv[])
{
@@ -626,8 +697,8 @@ main(int argc, char *argv[])
#ifdef NVMUTIL_UNVEIL
if (pledge("stdio rpath wpath unveil", NULL) == -1)
err(errno, "pledge");
- if (unveil("/dev/urandom", "r") == -1)
- err(errno, "unveil /dev/urandom");
+ if (unveil("/dev/null", "r") == -1)
+ err(errno, "unveil /dev/null");
#else
if (pledge("stdio rpath wpath", NULL) == -1)
err(errno, "pledge");
@@ -664,7 +735,7 @@ main(int argc, char *argv[])
#endif
#endif
- open_dev_urandom();
+ srand((uint)(time(NULL) ^ getpid()));
open_gbe_file();
lock_gbe_file();
@@ -705,7 +776,8 @@ main(int argc, char *argv[])
err(EIO, "%s: bad write", fname);
}
- close_files();
+ if (close_files() == -1)
+ err(EIO, "%s: close", fname);
return EXIT_SUCCESS;
}
@@ -882,26 +954,39 @@ xstrxcmp(const char *a, const char *b, size_t maxlen)
}
static void
-open_dev_urandom(void)
-{
- rname = newrandom;
- urandom_fd = open(rname, O_RDONLY);
- if (urandom_fd != -1)
- return;
-
- /* fallback on VERY VERY VERY old unix */
- use_prng = 1;
- srand((uint)(time(NULL) ^ getpid()));
-}
-
-static void
open_gbe_file(void)
{
struct stat gbe_st;
+ int flags;
xopen(&gbe_fd, fname,
command[cmd_index].flags | O_BINARY | O_NOFOLLOW, &gbe_st);
+ /* inode will be checked later on write */
+ gbe_dev = gbe_st.st_dev;
+ gbe_ino = gbe_st.st_ino;
+
+ if (gbe_st.st_nlink > 1)
+ fprintf(stderr,
+ "%s: warning: file has %lu hard links\n",
+ fname, (ulong)gbe_st.st_nlink);
+
+ if (gbe_st.st_nlink == 0)
+ err(EIO, "%s: file unlinked while open", fname);
+
+ flags = fcntl(gbe_fd, F_GETFL);
+ if (flags == -1)
+ err(errno, "%s: fcntl(F_GETFL)", fname);
+
+ /*
+ * O_APPEND must not be used, because this
+ * allows POSIX write() to ignore the
+ * current write offset and write at EOF,
+ * which would therefore break pread/pwrite
+ */
+ if (flags & O_APPEND)
+ err(EIO, "%s: O_APPEND flag");
+
gbe_file_size = gbe_st.st_size;
switch (gbe_file_size) {
@@ -943,6 +1028,9 @@ xopen(int *fd_ptr, const char *path, int flags, struct stat *st)
if (!S_ISREG(st->st_mode))
err(errno, "%s: not a regular file", path);
+
+ if (lseek(*fd_ptr, 0, SEEK_CUR) == (off_t)-1)
+ err(errno, "%s: file not seekable", path);
}
static void
@@ -1181,25 +1269,24 @@ hextonum(char ch_s)
return 16; /* invalid character */
}
+#if defined(HAVE_ARC4RANDOM_BUF) && \
+ (HAVE_ARC4RANDOM_BUF) > 0
static ushort
rhex(void)
{
+ static u8 num[12];
static size_t n = 0;
- if (use_prng)
- return fallback_rand();
-
if (!n) {
- n = sizeof(rnum);
- if (rw_file_exact(urandom_fd, rnum, n, 0, IO_READ, 0, 1) == -1)
- err(errno, "Randomisation failed");
+ n = 12;
+ arc4random_buf(num, 12);
}
- return (ushort)(rnum[--n] & 0xf);
+ return num[--n] & 0xf;
}
-
+#else
static ushort
-fallback_rand(void)
+rhex(void)
{
struct timeval tv;
ulong mix;
@@ -1252,6 +1339,7 @@ entropy_jitter(void)
return mix;
}
+#endif
static void
write_mac_part(size_t partnum)
@@ -1349,7 +1437,7 @@ cmd_helper_cat(void)
fflush(NULL);
for (p = 0; p < 2; p++) {
- gbe_cat_buf(buf + (p * GBE_PART_SIZE));
+ gbe_cat_buf(buf + (size_t)(p * GBE_PART_SIZE));
for (ff = 0; ff < n; ff++)
gbe_cat_buf(pad);
@@ -1360,7 +1448,8 @@ static void
gbe_cat_buf(u8 *b)
{
if (rw_file_exact(STDOUT_FILENO, b,
- GBE_PART_SIZE, 0, IO_WRITE, 1, 1) < 0)
+ GBE_PART_SIZE, 0, IO_WRITE, LOOP_EAGAIN, LOOP_EINTR,
+ MAX_ZERO_RW_RETRY) < 0)
err(errno, "stdout: cat");
}
@@ -1383,6 +1472,9 @@ write_gbe_file(void)
if (fstat(gbe_fd, &gbe_st) == -1)
err(errno, "%s: re-check", fname);
+ if (gbe_st.st_dev != gbe_dev || gbe_st.st_ino != gbe_ino)
+ err(EIO, "%s: file replaced while open", fname);
+
if (gbe_st.st_size != gbe_file_size)
err(errno, "%s: file size changed before write", fname);
@@ -1558,6 +1650,7 @@ check_written_part(size_t p)
u8 *mem_offset;
off_t file_offset;
u8 *buf_restore;
+ struct stat st;
if (!part_modified[p])
return;
@@ -1568,6 +1661,14 @@ check_written_part(size_t p)
mem_offset = gbe_mem_offset(p, "pwrite");
file_offset = (off_t)gbe_file_offset(p, "pwrite");
+ memset(pad, 0xff, sizeof(pad));
+
+ if (fstat(gbe_fd, &st) == -1)
+ err(errno, "%s: fstat (post-write)", fname);
+
+ if (st.st_dev != gbe_dev || st.st_ino != gbe_ino)
+ err(EIO, "%s: file changed during write", fname);
+
r = rw_gbe_file_exact(gbe_fd, pad,
gbe_rw_size, file_offset, IO_PREAD);
@@ -1578,6 +1679,10 @@ check_written_part(size_t p)
else if (memcmp(mem_offset, pad, gbe_rw_size) != 0)
rw_check_bad_part[p] = io_err_gbe = 1;
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p])
+ return;
+
/*
* We only load one part on-file, into memory but
* always at offset zero, for post-write checks.
@@ -1614,10 +1719,15 @@ report_io_err_rw(void)
"%s: pwrite: corrupt write on p%lu\n",
fname, (ulong)p);
- /*
- * so that we can re-use main checksumming features
- * correct part to read always part 0
- */
+ if (rw_check_err_read[p] ||
+ rw_check_partial_read[p]) {
+ fprintf(stderr,
+ "%s: p%lu: skipped checksum verification "
+ "(because read failed)\n",
+ fname, (ulong)p);
+
+ continue;
+ }
fprintf(stderr, "%s: ", fname);
@@ -1648,7 +1758,7 @@ gbe_mem_offset(size_t p, const char *f_op)
off_t gbe_off = gbe_x_offset(p, f_op, "mem",
GBE_PART_SIZE, GBE_FILE_SIZE);
- return (u8 *)(buf + gbe_off);
+ return (u8 *)(buf + (size_t)gbe_off);
}
/*
@@ -1692,13 +1802,20 @@ static ssize_t
rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
off_t off, int rw_type)
{
+ size_t mem_addr;
+ size_t buf_addr;
+ size_t buf_end;
+
if (mem == NULL)
goto err_rw_gbe_file_exact;
- if (mem != (void *)pad
- && mem != (void *)rnum
- && (mem < buf || mem >= (buf + GBE_FILE_SIZE)))
- goto err_rw_gbe_file_exact;
+ mem_addr = (size_t)(void *)mem;
+ buf_addr = (size_t)(void *)buf;
+ buf_end = buf_addr + (size_t)GBE_FILE_SIZE;
+
+ if (mem != (void *)pad &&
+ (mem_addr < buf_addr || mem_addr >= buf_end))
+ goto err_rw_gbe_file_exact;
if (off < 0 || off >= gbe_file_size)
goto err_rw_gbe_file_exact;
@@ -1709,7 +1826,8 @@ rw_gbe_file_exact(int fd, u8 *mem, size_t nrw,
if (nrw > GBE_PART_SIZE)
goto err_rw_gbe_file_exact;
- return rw_file_exact(fd, mem, nrw, off, rw_type, 0, 1);
+ return rw_file_exact(fd, mem, nrw, off, rw_type,
+ NO_LOOP_EAGAIN, LOOP_EINTR, MAX_ZERO_RW_RETRY);
err_rw_gbe_file_exact:
errno = EIO;
@@ -1717,92 +1835,78 @@ err_rw_gbe_file_exact:
}
/*
- * Read or write the exact contents of a file,
- * along with a buffer, (if applicable) offset,
- * and number of bytes to be read. It unifies
- * the functionality of read(), pread(), write()
- * and pwrite(), with retry-on-EINTR and also
- * prevents infinite loop on zero-reads.
- *
- * The pread() and pwrite() functionality are
- * provided by yet another portable function,
- * prw() - see notes below.
+ * Safe I/O functions wrapping around
+ * read(), write() and providing a portable
+ * analog of both pread() and pwrite().
+ * These functions are designed for maximum
+ * robustness, checking NULL inputs, overflowed
+ * outputs, and all kinds of errors that the
+ * standard libc functions don't.
*
- * This must only be used on files. It cannot
- * be used on sockets or pipes, because 0-byte
- * reads are treated like fatal errors. This
- * means that EOF is also considered fatal.
+ * Looping on EINTR and EAGAIN is supported.
+ * EINTR/EAGAIN looping is done indefinitely.
*/
-static ssize_t
-rw_file_exact(int fd, u8 *mem, size_t nrw,
- off_t off, int rw_type, int loop_eagain,
- int loop_eintr)
-{
- ssize_t rv;
- size_t rc;
-
- for (rc = 0, rv = 0; rc < nrw; ) {
- if ((rv = rw_file_once(fd, mem, nrw, off, rw_type, rc,
- loop_eagain, loop_eintr)) < 0)
- return -1;
-
- /* rw_file_once never returns
- zero, but it's still logically
- incorrect not to handle it here */
-
- if (rv == 0) {
- errno = EIO;
- return -1;
- }
-
- rc += (size_t)rv;
- }
-
- return rc;
-}
/*
- * Helper function for rw_file_exact, that
- * also does extra error handling pertaining
- * to GbE file offsets.
+ * rw_file_exact() - Read perfectly or die
*
- * May not return all requested bytes (nrw).
- * Use rw_file_exact for guaranteed length.
+ * Read/write, and absolutely insist on an
+ * absolute read; e.g. if 100 bytes are
+ * requested, this MUST return 100.
*
* This function will never return zero.
* It will only return below (error),
* or above (success). On error, -1 is
* returned and errno is set accordingly.
+ *
+ * Zero-byte returns are not allowed.
+ * It will re-spin a finite number of
+ * times upon zero-return, to recover,
+ * otherwise it will return an error.
*/
static ssize_t
-rw_file_once(int fd, u8 *mem, size_t nrw,
- off_t off, int rw_type, size_t rc,
- int loop_eagain, int loop_eintr)
+rw_file_exact(int fd, u8 *mem, size_t nrw,
+ off_t off, int rw_type, int loop_eagain,
+ int loop_eintr, size_t max_retries)
{
- ssize_t rv;
+ ssize_t rv = 0;
+ ssize_t rc = 0;
size_t retries_on_zero = 0;
- size_t max_retries = 10;
- if (mem == NULL)
- goto err_rw_file_once;
+ while (1) {
-read_again:
- rv = prw(fd, mem + rc, nrw - rc, off + rc, rw_type,
- loop_eagain, loop_eintr);
+ rc += rv;
+ if ((size_t)rc >= nrw)
+ break;
- if (rv < 0)
- return -1;
+ rv = prw(fd,
+ mem + (size_t)rc,
+ nrw - (size_t)rc,
+ off + (off_t)rc,
+ rw_type, loop_eagain, loop_eintr);
+
+ if (rv < 0)
+ return -1;
+
+ /* Prevent theoretical overflow */
+ if ((size_t)rv > (nrw - rc))
+ goto err_rw_file_exact;
- if ((size_t)rv > (nrw - rc))/* don't overflow */
- goto err_rw_file_once;
+ if (rv == 0) {
+ if (retries_on_zero++ < max_retries)
+ continue;
+ goto err_rw_file_exact;
+ }
- if (rv != 0)
- return rv;
+ retries_on_zero = 0;
+ }
+
+ if ((size_t)rc != nrw)
+ goto err_rw_file_exact;
- if (retries_on_zero++ < max_retries)
- goto read_again;
+ return rw_over_nrw(rc, nrw);
-err_rw_file_once:
+err_rw_file_exact:
errno = EIO;
return -1;
}
@@ -1818,6 +1922,9 @@ err_rw_file_once:
* This limitation is acceptable, since nvmutil is
* single-threaded. Portability is the main goal.
*
+ * If you need real pwrite/pread, just compile
+ * with flag: HAVE_REAL_PREAD_PWRITE=1
+ *
* A fallback is provided for regular read/write.
* rw_type can be IO_READ, IO_WRITE, IO_PREAD
* or IO_PWRITE
@@ -1830,6 +1937,10 @@ err_rw_file_once:
* also mitigates a few theoretical libc bugs.
* It is designed for extremely safe single-threaded
* I/O on applications that need it.
+ *
+ * NOTE: If you use loop_eagain (1), you enable wait
+ * loop on EAGAIN. Beware if using this on a non-blocking
+ * pipe (it could spin indefinitely).
*/
static ssize_t
@@ -1837,11 +1948,16 @@ prw(int fd, void *mem, size_t nrw,
off_t off, int rw_type,
int loop_eagain, int loop_eintr)
{
- off_t off_orig;
ssize_t r;
- int saved_errno;
- int flags;
int positional_rw;
+ struct stat st;
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
+ int saved_errno;
+ off_t verified;
+ off_t off_orig;
+ off_t off_last;
+#endif
if (mem == NULL)
goto err_prw;
@@ -1855,6 +1971,20 @@ prw(int fd, void *mem, size_t nrw,
r = -1;
+ /* Programs like cat can use this,
+ so we only check if it's a normal
+ file if not looping EAGAIN */
+ if (!loop_eagain) {
+ /*
+ * Checking on every run of prw()
+ * is expensive if called many
+ * times, but is defensive in
+ * case the status changes.
+ */
+ if (check_file(fd, &st) == -1)
+ return -1;
+ }
+
if (rw_type >= IO_PREAD)
positional_rw = 1; /* pread/pwrite */
else
@@ -1863,10 +1993,21 @@ prw(int fd, void *mem, size_t nrw,
try_rw_again:
if (!positional_rw) {
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+real_pread_pwrite:
+#endif
if (rw_type == IO_WRITE)
r = write(fd, mem, nrw);
else if (rw_type == IO_READ)
r = read(fd, mem, nrw);
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ else if (rw_type == IO_PWRITE)
+ r = pwrite(fd, mem, nrw, off);
+ else if (rw_type == IO_PREAD)
+ r = pread(fd, mem, nrw, off);
+#endif
if (r == -1 && (errno == try_err(loop_eintr, EINTR)
|| errno == try_err(loop_eagain, EAGAIN)))
@@ -1875,57 +2016,99 @@ try_rw_again:
return rw_over_nrw(r, nrw);
}
- flags = fcntl(fd, F_GETFL);
- if (flags == -1)
- return -1;
-
- /*
- * O_APPEND must not be used, because this
- * allows POSIX write() to ignore the
- * current write offset and write at EOF,
- * which would therefore break pread/pwrite
- */
- if (flags & O_APPEND)
- goto err_prw;
-
+#if defined(HAVE_REAL_PREAD_PWRITE) && \
+ HAVE_REAL_PREAD_PWRITE > 0
+ goto real_pread_pwrite;
+#else
if ((off_orig = lseek_loop(fd, (off_t)0, SEEK_CUR,
- loop_eagain, loop_eintr)) == (off_t)-1)
+ loop_eagain, loop_eintr)) == (off_t)-1) {
r = -1;
- else if (lseek_loop(fd, off, SEEK_SET,
- loop_eagain, loop_eintr) == (off_t)-1)
+ } else if (lseek_loop(fd, off, SEEK_SET,
+ loop_eagain, loop_eintr) == (off_t)-1) {
r = -1;
+ } else {
+ verified = lseek_loop(fd, (off_t)0, SEEK_CUR,
+ loop_eagain, loop_eintr);
- do {
- if (rw_type == IO_PREAD)
- r = read(fd, mem, nrw);
- else if (rw_type == IO_PWRITE)
- r = write(fd, mem, nrw);
+ /*
+ * Partial thread-safety: detect
+ * if the offset changed to what
+ * we previously got. If it did,
+ * then another thread may have
+ * changed it.
+ *
+ * This is no substitute for real
+ * pread/pwrite, which would be
+ * fully atomic at kernel-level
+ * and do not use file offsets.
+ *
+ * TODO: Add a toggle to make it
+ * recover instead, reset
+ * to known offset, and
+ * carry on operations.
+ *
+ * Failure is the better option
+ * here, since recovery would
+ * mask hidden bugs in code.
+ */
+ if (off != verified)
+ goto err_prw;
- r = rw_over_nrw(r, nrw);
- } while (r == -1 &&
- (errno == try_err(loop_eintr, EINTR)
- || errno == try_err(loop_eagain, EAGAIN)));
+ do {
+ if (rw_type == IO_PREAD)
+ r = read(fd, mem, nrw);
+ else if (rw_type == IO_PWRITE)
+ r = write(fd, mem, nrw);
+
+ r = rw_over_nrw(r, nrw);
+
+ } while (r == -1 &&
+ (errno == try_err(loop_eintr, EINTR)
+ || errno == try_err(loop_eagain, EAGAIN)));
+ }
saved_errno = errno;
- if (lseek_loop(fd, off_orig, SEEK_SET,
- loop_eagain, loop_eintr) == (off_t)-1) {
+ off_last = lseek_loop(fd, off_orig, SEEK_SET,
+ loop_eagain, loop_eintr);
+ if (off_last == (off_t)-1) {
errno = saved_errno;
return -1;
}
+ if (off_last != off_orig)
+ goto err_prw;
errno = saved_errno;
return rw_over_nrw(r, nrw);
+#endif
err_prw:
errno = EIO;
return -1;
}
+static int
+check_file(int fd, struct stat *st)
+{
+ if (fstat(fd, st) == -1)
+ goto err_is_file;
+
+ if (!S_ISREG(st->st_mode))
+ goto err_is_file;
+
+ return 0;
+
+err_is_file:
+ errno = EIO;
+ return -1;
+}
+
/*
+ * Check overflows caused by buggy libc.
+ *
* POSIX can say whatever it wants.
* specification != implementation
*/
-static int
+static ssize_t
rw_over_nrw(ssize_t r, size_t nrw)
{
if (r == -1)
@@ -1963,6 +2146,13 @@ err_rw_over_nrw:
return -1;
}
+#if !defined(HAVE_REAL_PREAD_PWRITE) || \
+ HAVE_REAL_PREAD_PWRITE < 1
+/*
+ * lseek_loop() does lseek() but optionally
+ * on an EINTR/EAGAIN wait loop. Used by prw()
+ * for setting offsets for positional I/O.
+ */
static off_t
lseek_loop(int fd, off_t off, int whence,
int loop_eagain, int loop_eintr)
@@ -1977,7 +2167,14 @@ lseek_loop(int fd, off_t off, int whence,
return old;
}
+#endif
+/*
+ * If a given error loop is enabled,
+ * e.g. EINTR or EAGAIN, an I/O operation
+ * will loop until errno isn't -1 and one
+ * of these, e.g. -1 and EINTR
+ */
static int
try_err(int loop_err, int errval)
{
@@ -1990,17 +2187,38 @@ try_err(int loop_err, int errval)
return -1;
}
+static int
+close_files(void)
+{
+ int close_err_gbe = 0;
+ int saved_errno = errno;
+
+ if (gbe_fd > -1) {
+ if (close(gbe_fd) == -1)
+ close_err_gbe = errno;
+ gbe_fd = -1;
+ }
+
+ if (saved_errno)
+ errno = saved_errno;
+
+ if (close_err_gbe)
+ return -1;
+
+ return 0;
+}
+
static void
err(int nvm_errval, const char *msg, ...)
{
va_list args;
- if (nvm_errval >= 0) {
- close_files();
- errno = nvm_errval;
- }
- if (errno <= 0)
+ if (errno < 0)
errno = ECANCELED;
+ if (!errno)
+ errno = nvm_errval;
+
+ (void)close_files();
fprintf(stderr, "%s: ", getnvmprogname());
@@ -2014,22 +2232,6 @@ err(int nvm_errval, const char *msg, ...)
exit(EXIT_FAILURE);
}
-static void
-close_files(void)
-{
- if (gbe_fd > -1) {
- if (close(gbe_fd) == -1)
- err(-1, "%s: close failed", fname);
- gbe_fd = -1;
- }
-
- if (urandom_fd > -1) {
- if (close(urandom_fd) == -1)
- err(-1, "%s: close failed", rname);
- urandom_fd = -1;
- }
-}
-
static const char *
getnvmprogname(void)
{