summaryrefslogtreecommitdiff
path: root/config/grub/xhci/patches/0025-Add-native-NVMe-driver-based-on-SeaBIOS.patch
diff options
context:
space:
mode:
authorLeah Rowe <leah@libreboot.org>2025-04-20 05:42:50 +0100
committerLeah Rowe <leah@libreboot.org>2025-04-20 05:43:53 +0100
commit86e7aa80c51979767208b858dac5f38d7e83914e (patch)
tree2db500d93ea92563d440a255211329ecaa8a0329 /config/grub/xhci/patches/0025-Add-native-NVMe-driver-based-on-SeaBIOS.patch
parent8d57bf6009e3eb6ed0dfab042981a700e9f093a3 (diff)
Update the GRUB revisions
A number of regressions were caused by the recent CVE fixes, many of which have since been fixed upstream. This includes several ext4 file system bugs, which caused some systems not to boot properly, when dealing with very large initramfs files. No additional patching has been made. This will be tested, and then used to provide a revision update for Libreboot 20241206. After this, there are several additional OOT patches that will be merged, for the next *testing release* of Libreboot. Update to this revision, for all GRUB trees: a4da71dafeea519b034beb159dfe80c486c2107c This brings in the following changes from upstream: * a4da71daf util/grub-install: Include raid5rec module for RAID 4 as well * 223fcf808 loader/ia64/efi/linux: Reset grub_errno on failure to allocate * 6504a8d4b lib/datetime: Specify license in emu module * 8fef533cf configure: Add -mno-relax on riscv* * 1fe094855 docs: Document the long options of tpm2_key_protect_init * 6252eb97c INSTALL: Document the packages needed for TPM2 key protector tests * 9d4b382aa docs: Update NV index mode of TPM2 key protector * 2043b6899 tests/tpm2_key_protector_test: Add more NV index mode tests * 9f66a4719 tests/tpm2_key_protector_test: Reset "ret" on fail * b7d89e667 tests/tpm2_key_protector_test: Simplify the NV index mode test * 5934bf51c util/grub-protect: Support NV index mode * cd9cb944d tpm2_key_protector: Support NV index handles * fa69deac5 tpm2_key_protector: Unseal key from a buffer * 75c480885 tss2: Add TPM 2.0 NV index commands * 041164d00 tss2: Fix the missing authCommand * 46c9f3a8d tpm2_key_protector: Add tpm2_dump_pcr command * 617dab9e4 tpm2_key_protector: Dump PCRs on policy fail * 204a6ddfb loader/i386/linux: Update linux_kernel_params to match upstream * 6b64f297e loader/xnu: Fix memory leak * f94d257e8 fs/btrfs: Fix memory leaks * 81146fb62 loader/i386/linux: Fix resource leak * 1d0059447 lib/reloacator: Fix memory leaks * f3f1fcecd disk/ldm: Fix memory leaks * aae2ea619 fs/ntfs: Fix NULL pointer dereference and possible infinite loop * 3b25e494d net/drivers/ieee1275/ofnet: Add missing grub_malloc() * fee6081ec kern/ieee1275/init: Increase MIN_RMA size for CAS negotiation on PowerPC machines * b66c6f918 fs/zfs: Fix a number of memory leaks in ZFS code * 1d59f39b5 tests/util/grub-shell: Remove the work directory on successful run and debug is not on * e0116f3bd tests/grub_cmd_cryptomount: Remove temporary directories if successful and debug is not on * e6e2b73db tests/grub_cmd_cryptomount: Default TMPDIR to /tmp * 32b02bb92 tests/grub_cmd_cryptomount: Cleanup the cryptsetup script unless debug is enabled * c188ca5d5 tests: Cleanup generated files on expected failure in grub_cmd_cryptomount * 50320c093 tests/util/grub-shell-luks-tester: Add missing line to create RET variable in cleanup * bb6d3199b tests/util/grub-shell-luks-tester: Find cryptodisk by UUID * 3fd163e45 tests/util/grub-shell: Default qemuopts to envvar $GRUB_QEMU_OPTS * ff7f55307 disk/lvm: Add informational messages in error cases of ignored features * a16b4304a disk/lvm: Add support for cachevol LV * 9a37d6114 disk/lvm: Add support for integrity LV * 6c14b87d6 lvm: Match all LVM segments before validation * d34b9120e disk/lvm: Remove unused cache_pool * 90848a1f7 disk/lvm: Make cache_lv more generic as ignored_feature_lv * 488ac8bda commands/ls: Add directory header for dir args * 096bf59e4 commands/ls: Print full paths for file args * 90288fc48 commands/ls: Output path for single file arguments given with path * 6337d84af commands/ls: Show modification time for file paths * cbfb031b1 commands/ls: Merge print_files_long() and print_files() into print_file() * 112d2069c commands/ls: Return proper GRUB_ERR_* for functions returning type grub_err_t * da9740cd5 commands/acpi: Use options enum to index command options * 1acf11fe4 docs: Capture additional commands restricted by lockdown * 6a168afd3 docs: Document restricted filesystems in lockdown * be0ae9583 loader/i386/bsd: Fix type passed for the kernel * ee27f07a6 kern/partition: Unbreak support for nested partitions * cb639acea lib/tss2/tss2_structs.h: Fix clang build - remove duplicate typedef * 696e35b7f include/grub/mm.h: Remove duplicate inclusion of grub/err.h * 187338f1a script/execute: Don't let trailing blank lines determine the return code * ff173a1c0 gitignore: Ignore generated files from libtasn * fbcc38891 util/grub.d/30_os-prober.in: Conditionally show or hide chain and efi menu entries * 56ccc5ed5 util/grub.d/30_os-prober.in: Fix GRUB_OS_PROBER_SKIP_LIST for non-EFI * 01f064064 docs: Do not reference non-existent --dumb option * 3f440b5a5 docs: Replace @lbracechar{} and @rbracechar{} with @{ and @} * f20988738 fs/xfs: Fix grub_xfs_iterate_dir() return value in case of failure * 1ed2628b5 fs/xfs: Add new superblock features added in Linux 6.12/6.13 * 348cd416a fs/ext2: Rework out-of-bounds read for inline and external extents * c730eddd2 disk/ahci: Remove conditional operator for endtime * f0a08324d term/ns8250-spcr: Return if redirection is disabled * 7161e2437 commands/file: Fix NULL dereference in the knetbsd tests * 11b9c2dd0 gdb_helper: Typo hueristic * 224aefd05 kern/efi/mm: Reset grub_mm_add_region_fn after ExitBootServices() call * 531750f7b i386/tsc: The GRUB menu gets stuck due to unserialized rdtsc * f2a1f66e7 kern/i386/tsc_pmtimer: The GRUB menu gets stuck due to failed calibration * 13f005ed8 loader/i386/linux: Fix cleanup if kernel doesn't support 64-bit addressing Signed-off-by: Leah Rowe <leah@libreboot.org>
Diffstat (limited to 'config/grub/xhci/patches/0025-Add-native-NVMe-driver-based-on-SeaBIOS.patch')
-rw-r--r--config/grub/xhci/patches/0025-Add-native-NVMe-driver-based-on-SeaBIOS.patch1074
1 files changed, 1074 insertions, 0 deletions
diff --git a/config/grub/xhci/patches/0025-Add-native-NVMe-driver-based-on-SeaBIOS.patch b/config/grub/xhci/patches/0025-Add-native-NVMe-driver-based-on-SeaBIOS.patch
new file mode 100644
index 00000000..96ecbaaf
--- /dev/null
+++ b/config/grub/xhci/patches/0025-Add-native-NVMe-driver-based-on-SeaBIOS.patch
@@ -0,0 +1,1074 @@
+From 7bb8bb98b86fc97e1ce04e6169d517acbd476e1e Mon Sep 17 00:00:00 2001
+From: Mate Kukri <km@mkukri.xyz>
+Date: Mon, 20 May 2024 11:43:35 +0100
+Subject: [PATCH 25/25] Add native NVMe driver based on SeaBIOS
+
+Tested to successfully boot Debian on QEMU and OptiPlex 3050.
+
+Signed-off-by: Mate Kukri <km@mkukri.xyz>
+---
+ Makefile.am | 2 +-
+ grub-core/Makefile.core.def | 6 +
+ grub-core/commands/nativedisk.c | 1 +
+ grub-core/disk/nvme-int.h | 208 +++++++++
+ grub-core/disk/nvme.c | 781 ++++++++++++++++++++++++++++++++
+ include/grub/disk.h | 1 +
+ 6 files changed, 998 insertions(+), 1 deletion(-)
+ create mode 100644 grub-core/disk/nvme-int.h
+ create mode 100644 grub-core/disk/nvme.c
+
+diff --git a/Makefile.am b/Makefile.am
+index 65016f856..7bc0866ba 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -434,7 +434,7 @@ if COND_i386_coreboot
+ FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst)
+ default_payload.elf: grub-mkstandalone grub-mkimage FORCE
+ test -f $@ && rm $@ || true
+- pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
++ pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata nvme xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
+ endif
+
+ endif
+diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def
+index fda723f0c..367e4b5e6 100644
+--- a/grub-core/Makefile.core.def
++++ b/grub-core/Makefile.core.def
+@@ -2684,3 +2684,9 @@ module = {
+ cflags = '-Wno-uninitialized';
+ cppflags = '-I$(srcdir)/lib/libtasn1-grub -I$(srcdir)/tests/asn1/';
+ };
++
++module = {
++ name = nvme;
++ common = disk/nvme.c;
++ enable = pci;
++};
+diff --git a/grub-core/commands/nativedisk.c b/grub-core/commands/nativedisk.c
+index 6806bff9c..fd68a513e 100644
+--- a/grub-core/commands/nativedisk.c
++++ b/grub-core/commands/nativedisk.c
+@@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative)
+ case GRUB_DISK_DEVICE_ATA_ID:
+ case GRUB_DISK_DEVICE_SCSI_ID:
+ case GRUB_DISK_DEVICE_XEN:
++ case GRUB_DISK_DEVICE_NVME_ID:
+ if (getnative)
+ break;
+ /* FALLTHROUGH */
+diff --git a/grub-core/disk/nvme-int.h b/grub-core/disk/nvme-int.h
+new file mode 100644
+index 000000000..1295b58aa
+--- /dev/null
++++ b/grub-core/disk/nvme-int.h
+@@ -0,0 +1,208 @@
++// NVMe datastructures and constants
++//
++// Copyright 2017 Amazon.com, Inc. or its affiliates.
++//
++// This file may be distributed under the terms of the GNU LGPLv3 license.
++
++#ifndef __NVME_INT_H
++#define __NVME_INT_H
++
++#include <grub/types.h>
++
++/* Data structures */
++
++/* The register file of a NVMe host controller. This struct follows the naming
++ scheme in the NVMe specification. */
++struct nvme_reg {
++ grub_uint64_t cap; /* controller capabilities */
++ grub_uint32_t vs; /* version */
++ grub_uint32_t intms; /* interrupt mask set */
++ grub_uint32_t intmc; /* interrupt mask clear */
++ grub_uint32_t cc; /* controller configuration */
++ grub_uint32_t _res0;
++ grub_uint32_t csts; /* controller status */
++ grub_uint32_t _res1;
++ grub_uint32_t aqa; /* admin queue attributes */
++ grub_uint64_t asq; /* admin submission queue base address */
++ grub_uint64_t acq; /* admin completion queue base address */
++};
++
++/* Submission queue entry */
++struct nvme_sqe {
++ union {
++ grub_uint32_t dword[16];
++ struct {
++ grub_uint32_t cdw0; /* Command DWORD 0 */
++ grub_uint32_t nsid; /* Namespace ID */
++ grub_uint64_t _res0;
++ grub_uint64_t mptr; /* metadata ptr */
++
++ grub_uint64_t dptr_prp1;
++ grub_uint64_t dptr_prp2;
++ };
++ };
++};
++
++/* Completion queue entry */
++struct nvme_cqe {
++ union {
++ grub_uint32_t dword[4];
++ struct {
++ grub_uint32_t cdw0;
++ grub_uint32_t _res0;
++ grub_uint16_t sq_head;
++ grub_uint16_t sq_id;
++ grub_uint16_t cid;
++ grub_uint16_t status;
++ };
++ };
++};
++
++/* The common part of every submission or completion queue. */
++struct nvme_queue {
++ grub_uint32_t *dbl; /* doorbell */
++ grub_uint16_t mask; /* length - 1 */
++};
++
++struct nvme_cq {
++ struct nvme_queue common;
++ struct nvme_cqe *cqe;
++
++ /* We have read upto (but not including) this entry in the queue. */
++ grub_uint16_t head;
++
++ /* The current phase bit the controller uses to indicate that it has written
++ a new entry. This is inverted after each wrap. */
++ unsigned phase : 1;
++};
++
++struct nvme_sq {
++ struct nvme_queue common;
++ struct nvme_sqe *sqe;
++
++ /* Corresponding completion queue. We only support a single SQ per CQ. */
++ struct nvme_cq *cq;
++
++ /* The last entry the controller has fetched. */
++ grub_uint16_t head;
++
++ /* The last value we have written to the tail doorbell. */
++ grub_uint16_t tail;
++};
++
++struct nvme_ctrl {
++ grub_pci_device_t pci;
++ struct nvme_reg volatile *reg;
++
++ grub_uint32_t ctrlnum;
++
++ grub_uint32_t doorbell_stride; /* in bytes */
++
++ struct nvme_sq admin_sq;
++ struct nvme_cq admin_cq;
++
++ grub_uint32_t ns_count;
++
++ struct nvme_sq io_sq;
++ struct nvme_cq io_cq;
++};
++
++struct nvme_namespace {
++ struct nvme_namespace *next;
++ struct nvme_namespace **prev;
++
++ char *devname;
++
++ grub_uint32_t nsnum;
++
++ struct nvme_ctrl *ctrl;
++
++ grub_uint32_t ns_id;
++
++ grub_uint64_t lba_count; /* The total amount of sectors. */
++
++ grub_uint32_t block_size;
++ grub_uint32_t metadata_size;
++ grub_uint32_t max_req_size;
++};
++
++/* Data structures for NVMe admin identify commands */
++
++struct nvme_identify_ctrl {
++ grub_uint16_t vid;
++ grub_uint16_t ssvid;
++ char sn[20];
++ char mn[40];
++ char fr[8];
++
++ grub_uint8_t rab;
++ grub_uint8_t ieee[3];
++ grub_uint8_t cmic;
++ grub_uint8_t mdts;
++
++ char _boring[516 - 78];
++
++ grub_uint32_t nn; /* number of namespaces */
++};
++
++struct nvme_identify_ns_list {
++ grub_uint32_t ns_id[1024];
++};
++
++struct nvme_lba_format {
++ grub_uint16_t ms;
++ grub_uint8_t lbads;
++ grub_uint8_t rp;
++};
++
++struct nvme_identify_ns {
++ grub_uint64_t nsze;
++ grub_uint64_t ncap;
++ grub_uint64_t nuse;
++ grub_uint8_t nsfeat;
++ grub_uint8_t nlbaf;
++ grub_uint8_t flbas;
++
++ char _boring[128 - 27];
++
++ struct nvme_lba_format lbaf[16];
++};
++
++union nvme_identify {
++ struct nvme_identify_ns ns;
++ struct nvme_identify_ctrl ctrl;
++ struct nvme_identify_ns_list ns_list;
++};
++
++/* NVMe constants */
++
++#define NVME_CAP_CSS_NVME (1ULL << 37)
++
++#define NVME_CSTS_FATAL (1U << 1)
++#define NVME_CSTS_RDY (1U << 0)
++
++#define NVME_CC_EN (1U << 0)
++
++#define NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U
++#define NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U
++#define NVME_SQE_OPC_ADMIN_IDENTIFY 6U
++
++#define NVME_SQE_OPC_IO_WRITE 1U
++#define NVME_SQE_OPC_IO_READ 2U
++
++#define NVME_ADMIN_IDENTIFY_CNS_ID_NS 0U
++#define NVME_ADMIN_IDENTIFY_CNS_ID_CTRL 1U
++#define NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U
++
++#define NVME_CQE_DW3_P (1U << 16)
++
++#define NVME_PAGE_SIZE 4096
++#define NVME_PAGE_MASK ~(NVME_PAGE_SIZE - 1)
++
++/* Length for the queue entries. */
++#define NVME_SQE_SIZE_LOG 6
++#define NVME_CQE_SIZE_LOG 4
++
++#endif
++
++/* EOF */
+diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c
+new file mode 100644
+index 000000000..093237c70
+--- /dev/null
++++ b/grub-core/disk/nvme.c
+@@ -0,0 +1,781 @@
++// Low level NVMe disk access
++//
++// Based on SeaBIOS NVMe driver - Copyright 2017 Amazon.com, Inc. or its affiliates.
++// Port to GRUB2 done by Mate Kukri
++//
++// This file may be distributed under the terms of the GNU LGPLv3 license.
++
++#include <grub/disk.h>
++#include <grub/dl.h>
++#include <grub/pci.h>
++#include "nvme-int.h"
++
++GRUB_MOD_LICENSE ("GPLv3"); /* LGPLv3 in reality but it is GPLv3 compatible */
++
++static grub_uint32_t grub_nvme_ctrlcnt;
++static grub_uint32_t grub_nvme_nscnt;
++
++static struct nvme_namespace *grub_nvme_namespaces;
++
++// Page aligned "dma bounce buffer" of size NVME_PAGE_SIZE
++static void *nvme_dma_buffer;
++
++static void *
++zalloc_page_aligned(grub_uint32_t size)
++{
++ void *res = grub_memalign(NVME_PAGE_SIZE, size);
++ if (res) grub_memset(res, 0, size);
++ return res;
++}
++
++static void
++nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, grub_uint16_t q_idx,
++ grub_uint16_t length)
++{
++ grub_memset(q, 0, sizeof(*q));
++ q->dbl = (grub_uint32_t *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride);
++ grub_dprintf("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
++ q->mask = length - 1;
++}
++
++static int
++nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, grub_uint16_t length,
++ struct nvme_cq *cq)
++{
++ nvme_init_queue_common(ctrl, &sq->common, q_idx, length);
++ sq->sqe = zalloc_page_aligned(sizeof(*sq->sqe) * length);
++
++ if (!sq->sqe) {
++ return -1;
++ }
++
++ grub_dprintf("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
++ sq->cq = cq;
++ sq->head = 0;
++ sq->tail = 0;
++
++ return 0;
++}
++
++static int
++nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx, grub_uint16_t length)
++{
++ nvme_init_queue_common(ctrl, &cq->common, q_idx, length);
++ cq->cqe = zalloc_page_aligned(sizeof(*cq->cqe) * length);
++ if (!cq->cqe) {
++ return -1;
++ }
++
++ cq->head = 0;
++
++ /* All CQE phase bits are initialized to zero. This means initially we wait
++ for the host controller to set these to 1. */
++ cq->phase = 1;
++
++ return 0;
++}
++
++static int
++nvme_poll_cq(struct nvme_cq *cq)
++{
++ grub_uint32_t dw3 = *(volatile grub_uint32_t *) &cq->cqe[cq->head].dword[3];
++ return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase);
++}
++
++static int
++nvme_is_cqe_success(struct nvme_cqe const *cqe)
++{
++ return ((cqe->status >> 1) & 0xFF) == 0;
++}
++
++static struct nvme_cqe
++nvme_error_cqe(void)
++{
++ struct nvme_cqe r;
++
++ /* 0xFF is a vendor specific status code != success. Should be okay for
++ indicating failure. */
++ grub_memset(&r, 0xFF, sizeof(r));
++ return r;
++}
++
++static struct nvme_cqe
++nvme_consume_cqe(struct nvme_sq *sq)
++{
++ struct nvme_cq *cq = sq->cq;
++
++ if (!nvme_poll_cq(cq)) {
++ /* Cannot consume a completion queue entry, if there is none ready. */
++ return nvme_error_cqe();
++ }
++
++ struct nvme_cqe *cqe = &cq->cqe[cq->head];
++ grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask;
++ grub_dprintf("nvme", "cq %p head %u -> %u\n", cq, cq->head, cq_next_head);
++ if (cq_next_head < cq->head) {
++ grub_dprintf("nvme", "cq %p wrap\n", cq);
++ cq->phase = ~cq->phase;
++ }
++ cq->head = cq_next_head;
++
++ /* Update the submission queue head. */
++ if (cqe->sq_head != sq->head) {
++ sq->head = cqe->sq_head;
++ grub_dprintf("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head);
++ }
++
++ /* Tell the controller that we consumed the completion. */
++ *(volatile grub_uint32_t *) cq->common.dbl = cq->head;
++
++ return *cqe;
++}
++
++static struct nvme_cqe
++nvme_wait(struct nvme_sq *sq)
++{
++ // static const unsigned nvme_timeout = 5000 /* ms */;
++ // grub_uint32_t to = timer_calc(nvme_timeout);
++ while (!nvme_poll_cq(sq->cq)) {
++ /* FIXME
++ yield();
++
++ if (timer_check(to)) {
++ warn_timeout();
++ return nvme_error_cqe();
++ }*/
++ }
++
++ return nvme_consume_cqe(sq);
++}
++
++/* Returns the next submission queue entry (or NULL if the queue is full). It
++ also fills out Command Dword 0 and clears the rest. */
++static struct nvme_sqe *
++nvme_get_next_sqe(struct nvme_sq *sq, grub_uint8_t opc, void *metadata, void *data, void *data2)
++{
++ if (((sq->head + 1) & sq->common.mask) == sq->tail) {
++ grub_dprintf("nvme", "submission queue is full\n");
++ return NULL;
++ }
++
++ struct nvme_sqe *sqe = &sq->sqe[sq->tail];
++ grub_dprintf("nvme", "sq %p next_sqe %u\n", sq, sq->tail);
++
++ grub_memset(sqe, 0, sizeof(*sqe));
++ sqe->cdw0 = opc | (sq->tail << 16 /* CID */);
++ sqe->mptr = (grub_uint32_t)metadata;
++ sqe->dptr_prp1 = (grub_uint32_t)data;
++ sqe->dptr_prp2 = (grub_uint32_t)data2;
++
++ return sqe;
++}
++
++/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */
++static void
++nvme_commit_sqe(struct nvme_sq *sq)
++{
++ grub_dprintf("nvme", "sq %p commit_sqe %u\n", sq, sq->tail);
++ sq->tail = (sq->tail + 1) & sq->common.mask;
++ *(volatile grub_uint32_t *) sq->common.dbl = sq->tail;
++}
++
++/* Perform an identify command on the admin queue and return the resulting
++ buffer. This may be a NULL pointer, if something failed. This function
++ cannot be used after initialization, because it uses buffers in tmp zone. */
++static union nvme_identify *
++nvme_admin_identify(struct nvme_ctrl *ctrl, grub_uint8_t cns, grub_uint32_t nsid)
++{
++ union nvme_identify *identify_buf = zalloc_page_aligned(4096);
++ if (!identify_buf)
++ return NULL;
++
++ struct nvme_sqe *cmd_identify;
++ cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
++ identify_buf, NULL);
++ if (!cmd_identify)
++ goto error;
++
++ cmd_identify->nsid = nsid;
++ cmd_identify->dword[10] = cns;
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ goto error;
++ }
++
++ return identify_buf;
++ error:
++ grub_free(identify_buf);
++ return NULL;
++}
++
++static struct nvme_identify_ctrl *
++nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl)
++{
++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl;
++}
++
++static struct nvme_identify_ns *
++nvme_admin_identify_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_id)
++{
++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS,
++ ns_id)->ns;
++}
++
++static void
++nvme_probe_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_idx, grub_uint8_t mdts)
++{
++ grub_uint32_t ns_id = ns_idx + 1;
++
++ struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id);
++ if (!id) {
++ grub_dprintf("nvme", "NVMe couldn't identify namespace %u.\n", ns_id);
++ goto free_buffer;
++ }
++
++ grub_uint8_t current_lba_format = id->flbas & 0xF;
++ if (current_lba_format > id->nlbaf) {
++ grub_dprintf("nvme", "NVMe NS %u: current LBA format %u is beyond what the "
++ " namespace supports (%u)?\n",
++ ns_id, current_lba_format, id->nlbaf + 1);
++ goto free_buffer;
++ }
++
++ if (!id->nsze) {
++ grub_dprintf("nvme", "NVMe NS %u is inactive.\n", ns_id);
++ goto free_buffer;
++ }
++
++ if (!nvme_dma_buffer) {
++ nvme_dma_buffer = zalloc_page_aligned(NVME_PAGE_SIZE);
++ if (!nvme_dma_buffer) {
++ goto free_buffer;
++ }
++ }
++
++ struct nvme_namespace *ns = grub_malloc(sizeof(*ns));
++ if (!ns) {
++ goto free_buffer;
++ }
++ grub_memset(ns, 0, sizeof(*ns));
++ ns->ctrl = ctrl;
++ ns->ns_id = ns_id;
++ ns->lba_count = id->nsze;
++
++ struct nvme_lba_format *fmt = &id->lbaf[current_lba_format];
++
++ ns->block_size = 1U << fmt->lbads;
++ ns->metadata_size = fmt->ms;
++
++ if (ns->block_size > NVME_PAGE_SIZE) {
++ /* If we see devices that trigger this path, we need to increase our
++ buffer size. */
++ grub_free(ns);
++ goto free_buffer;
++ }
++
++ if (mdts) {
++ ns->max_req_size = ((1U << mdts) * NVME_PAGE_SIZE) / ns->block_size;
++ grub_dprintf("nvme", "NVME NS %u max request size: %d sectors\n",
++ ns_id, ns->max_req_size);
++ } else {
++ ns->max_req_size = -1U;
++ }
++
++ ns->devname = grub_xasprintf("nvme%un%u", ctrl->ctrlnum, ns_id);
++ ns->nsnum = grub_nvme_nscnt++;
++
++ grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST (ns));
++
++free_buffer:
++ grub_free(id);
++}
++
++
++/* Release memory allocated for a completion queue */
++static void
++nvme_destroy_cq(struct nvme_cq *cq)
++{
++ grub_free(cq->cqe);
++ cq->cqe = NULL;
++}
++
++/* Release memory allocated for a submission queue */
++static void
++nvme_destroy_sq(struct nvme_sq *sq)
++{
++ grub_free(sq->sqe);
++ sq->sqe = NULL;
++}
++
++/* Returns 0 on success. */
++static int
++nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx)
++{
++ int rc;
++ struct nvme_sqe *cmd_create_cq;
++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
++
++ rc = nvme_init_cq(ctrl, cq, q_idx, length);
++ if (rc) {
++ goto err;
++ }
++
++ cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL,
++ cq->cqe, NULL);
++ if (!cmd_create_cq) {
++ goto err_destroy_cq;
++ }
++
++ cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
++ cmd_create_cq->dword[11] = 1 /* physically contiguous */;
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "create io cq failed: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++
++ goto err_destroy_cq;
++ }
++
++ return 0;
++
++err_destroy_cq:
++ nvme_destroy_cq(cq);
++err:
++ return -1;
++}
++
++/* Returns 0 on success. */
++static int
++nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, struct nvme_cq *cq)
++{
++ int rc;
++ struct nvme_sqe *cmd_create_sq;
++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
++
++ rc = nvme_init_sq(ctrl, sq, q_idx, length, cq);
++ if (rc) {
++ goto err;
++ }
++
++ cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL,
++ sq->sqe, NULL);
++ if (!cmd_create_sq) {
++ goto err_destroy_sq;
++ }
++
++ cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
++ cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */;
++ grub_dprintf("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq,
++ cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "create io sq failed: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++ goto err_destroy_sq;
++ }
++
++ return 0;
++
++err_destroy_sq:
++ nvme_destroy_sq(sq);
++err:
++ return -1;
++}
++
++/* Reads count sectors into buf. The buffer cannot cross page boundaries. */
++static int
++nvme_io_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *prp1, void *prp2,
++ grub_uint16_t count, int write)
++{
++ if (((grub_uint32_t)prp1 & 0x3) || ((grub_uint32_t)prp2 & 0x3)) {
++ /* Buffer is misaligned */
++ return -1;
++ }
++
++ struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq,
++ write ? NVME_SQE_OPC_IO_WRITE
++ : NVME_SQE_OPC_IO_READ,
++ NULL, prp1, prp2);
++ io_read->nsid = ns->ns_id;
++ io_read->dword[10] = (grub_uint32_t)lba;
++ io_read->dword[11] = (grub_uint32_t)(lba >> 32);
++ io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1);
++
++ nvme_commit_sqe(&ns->ctrl->io_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "read io: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++
++ return -1;
++ }
++
++ grub_dprintf("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id, write ? "write" : "read",
++ lba, count);
++ return count;
++}
++
++// Transfer up to one page of data using the internal dma bounce buffer
++static int
++nvme_bounce_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count,
++ int write)
++{
++ grub_uint16_t const max_blocks = NVME_PAGE_SIZE / ns->block_size;
++ grub_uint16_t blocks = count < max_blocks ? count : max_blocks;
++
++ if (write)
++ grub_memcpy(nvme_dma_buffer, buf, blocks * ns->block_size);
++
++ int res = nvme_io_xfer(ns, lba, nvme_dma_buffer, NULL, blocks, write);
++
++ if (!write && res >= 0)
++ grub_memcpy(buf, nvme_dma_buffer, res * ns->block_size);
++
++ return res;
++}
++
++#define NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */
++
++// Transfer data using page list (if applicable)
++static int
++nvme_prpl_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count,
++ int write)
++{
++ grub_uint32_t base = (long)buf;
++ grub_int32_t size;
++
++ if (count > ns->max_req_size)
++ count = ns->max_req_size;
++
++ size = count * ns->block_size;
++ /* Special case for transfers that fit into PRP1, but are unaligned */
++ if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE))
++ goto single;
++
++ /* Every request has to be page aligned */
++ if (base & ~NVME_PAGE_MASK)
++ goto bounce;
++
++ /* Make sure a full block fits into the last chunk */
++ if (size & (ns->block_size - 1ULL))
++ goto bounce;
++
++ /* Build PRP list if we need to describe more than 2 pages */
++ if ((ns->block_size * count) > (NVME_PAGE_SIZE * 2)) {
++ grub_uint32_t prpl_len = 0;
++ grub_uint64_t *prpl = nvme_dma_buffer;
++ int first_page = 1;
++ for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) {
++ if (first_page) {
++ /* First page is special */
++ first_page = 0;
++ continue;
++ }
++ if (prpl_len >= NVME_MAX_PRPL_ENTRIES)
++ goto bounce;
++ prpl[prpl_len++] = base;
++ }
++ return nvme_io_xfer(ns, lba, buf, prpl, count, write);
++ }
++
++ /* Directly embed the 2nd page if we only need 2 pages */
++ if ((ns->block_size * count) > NVME_PAGE_SIZE)
++ return nvme_io_xfer(ns, lba, buf, (char *) buf + NVME_PAGE_SIZE, count, write);
++
++single:
++ /* One page is enough, don't expose anything else */
++ return nvme_io_xfer(ns, lba, buf, NULL, count, write);
++
++bounce:
++ /* Use bounce buffer to make transfer */
++ return nvme_bounce_xfer(ns, lba, buf, count, write);
++}
++
++static int
++nvme_create_io_queues(struct nvme_ctrl *ctrl)
++{
++ if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3))
++ goto err;
++
++ if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
++ goto err_free_cq;
++
++ return 0;
++
++ err_free_cq:
++ nvme_destroy_cq(&ctrl->io_cq);
++ err:
++ return -1;
++}
++
++/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
++static int
++nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy)
++{
++ // grub_uint32_t const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU);
++ // grub_uint32_t to = timer_calc(max_to);
++ grub_uint32_t csts;
++
++ while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) {
++ // FIXME
++ //yield();
++
++ if (csts & NVME_CSTS_FATAL) {
++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n");
++ return -1;
++ }
++
++ /*
++ if (timer_check(to)) {
++ warn_timeout();
++ return -1;
++ }*/
++ }
++
++ return 0;
++}
++
++/* Returns 0 on success. */
++static int grub_nvme_controller_enable(struct nvme_ctrl *ctrl)
++{
++ grub_pci_address_t addr;
++ int rc;
++
++ addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND);
++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_BUS_MASTER);
++
++ /* Turn the controller off. */
++ ctrl->reg->cc = 0;
++ if (nvme_wait_csts_rdy(ctrl, 0)) {
++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n");
++ return -1;
++ }
++
++ ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
++
++ rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1,
++ NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
++ if (rc) {
++ return -1;
++ }
++
++ rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0,
++ NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq);
++ if (rc) {
++ goto err_destroy_admin_cq;
++ }
++
++ ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
++ | ctrl->admin_sq.common.mask;
++
++ ctrl->reg->asq = (grub_uint32_t)ctrl->admin_sq.sqe;
++ ctrl->reg->acq = (grub_uint32_t)ctrl->admin_cq.cqe;
++
++ grub_dprintf("nvme", " admin submission queue: %p\n", ctrl->admin_sq.sqe);
++ grub_dprintf("nvme", " admin completion queue: %p\n", ctrl->admin_cq.cqe);
++
++ ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20)
++ | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */);
++
++ if (nvme_wait_csts_rdy(ctrl, 1)) {
++ grub_dprintf("nvme", "NVMe fatal error while enabling controller\n");
++ goto err_destroy_admin_sq;
++ }
++
++ /* The admin queue is set up and the controller is ready. Let's figure out
++ what namespaces we have. */
++
++ struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl);
++
++ if (!identify) {
++ grub_dprintf("nvme", "NVMe couldn't identify controller.\n");
++ goto err_destroy_admin_sq;
++ }
++
++ grub_dprintf("nvme", "NVMe has %u namespace%s.\n",
++ identify->nn, (identify->nn == 1) ? "" : "s");
++
++ ctrl->ns_count = identify->nn;
++ grub_uint8_t mdts = identify->mdts;
++ grub_free(identify);
++
++ if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) {
++ /* No point to continue, if the controller says it doesn't have
++ namespaces or we couldn't create I/O queues. */
++ goto err_destroy_admin_sq;
++ }
++
++ /* Give the controller a global number */
++ ctrl->ctrlnum = grub_nvme_ctrlcnt++;
++
++ /* Populate namespace IDs */
++ for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
++ nvme_probe_ns(ctrl, ns_idx, mdts);
++ }
++
++ grub_dprintf("nvme", "NVMe initialization complete!\n");
++ return 0;
++
++ err_destroy_admin_sq:
++ nvme_destroy_sq(&ctrl->admin_sq);
++ err_destroy_admin_cq:
++ nvme_destroy_cq(&ctrl->admin_cq);
++ return -1;
++}
++
++static int grub_nvme_pci_probe(grub_pci_device_t dev, grub_pci_id_t pciid __attribute__ ((unused)), void *data __attribute__ ((unused)))
++{
++ grub_pci_address_t addr;
++ grub_uint32_t class, bar, version;
++ struct nvme_reg volatile *reg;
++
++ class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS));
++ if (class >> 16 != 0x0108)
++ return 0;
++ if ((class >> 8 & 0xff) != 2) { /* as of NVM 1.0e */
++ grub_dprintf("nvme", "Found incompatble NVMe: prog-if=%02x\n", class >> 8 & 0xff);
++ return 0;
++ }
++
++ bar = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_ADDRESS_REG0));
++ reg = grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK, sizeof (*reg));
++
++ addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND);
++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_MEM_ENABLED);
++
++ version = reg->vs;
++ grub_dprintf("nvme", "Found NVMe controller with version %u.%u.%u.\n", version >> 16, (version >> 8) & 0xFF, version & 0xFF);
++ grub_dprintf("nvme", " Capabilities %016llx\n", reg->cap);
++
++ if (~reg->cap & NVME_CAP_CSS_NVME) {
++ grub_dprintf("nvme", "Controller doesn't speak NVMe command set. Skipping.\n");
++ goto err;
++ }
++
++ struct nvme_ctrl *ctrl = grub_malloc(sizeof(*ctrl));
++ if (!ctrl)
++ goto err;
++
++ grub_memset(ctrl, 0, sizeof(*ctrl));
++
++ ctrl->reg = reg;
++ ctrl->pci = dev;
++
++ if (grub_nvme_controller_enable(ctrl))
++ goto err_free_ctrl;
++
++ return 0;
++
++ err_free_ctrl:
++ grub_free(ctrl);
++ err:
++ grub_dprintf("nvme", "Failed to enable NVMe controller.\n");
++ return 0;
++}
++
++static int
++grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data, grub_disk_pull_t pull)
++{
++ struct nvme_namespace *ns;
++
++ if (pull != GRUB_DISK_PULL_NONE)
++ return 0;
++
++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces)
++ if (hook (ns->devname, hook_data))
++ return 1;
++
++ return 0;
++}
++
++static grub_err_t
++grub_nvme_open (const char *name __attribute ((unused)), grub_disk_t disk __attribute ((unused)))
++{
++ struct nvme_namespace *ns;
++
++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces)
++ if (grub_strcmp (ns->devname, name) == 0)
++ break;
++
++ if (! ns)
++ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device");
++
++ disk->total_sectors = ns->lba_count;
++ disk->max_agglomerate = ns->max_req_size;
++
++ disk->id = ns->nsnum; /* global id of the namespace */
++
++ disk->data = ns;
++
++ return 0;
++}
++
++static grub_err_t
++nvme_readwrite(struct nvme_namespace *ns, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf, int write)
++{
++ for (int i = 0; i < num_sectors;) {
++ grub_uint16_t blocks_remaining = num_sectors - i;
++ char *op_buf = buf + i * ns->block_size;
++ int blocks = nvme_prpl_xfer(ns, sector + i, op_buf, blocks_remaining, write);
++ if (blocks < 0)
++ return GRUB_ERR_IO;
++ i += blocks;
++ }
++ return GRUB_ERR_NONE;
++}
++
++static grub_err_t
++grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf)
++{
++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 0);
++}
++
++static grub_err_t
++grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, const char *buf)
++{
++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 1);
++}
++
++static struct grub_disk_dev grub_nvme_dev =
++ {
++ .name = "nvme",
++ .id = GRUB_DISK_DEVICE_NVME_ID,
++ .disk_iterate = grub_nvme_iterate,
++ .disk_open = grub_nvme_open,
++ .disk_read = grub_nvme_read,
++ .disk_write = grub_nvme_write,
++ .next = 0
++ };
++
++GRUB_MOD_INIT(nvme)
++{
++ grub_stop_disk_firmware ();
++ grub_pci_iterate (grub_nvme_pci_probe, NULL);
++ grub_disk_dev_register (&grub_nvme_dev);
++}
++
++GRUB_MOD_FINI(nvme)
++{
++ grub_disk_dev_unregister (&grub_nvme_dev);
++}
+diff --git a/include/grub/disk.h b/include/grub/disk.h
+index fbf23df7f..186e76f0b 100644
+--- a/include/grub/disk.h
++++ b/include/grub/disk.h
+@@ -52,6 +52,7 @@ enum grub_disk_dev_id
+ GRUB_DISK_DEVICE_UBOOTDISK_ID,
+ GRUB_DISK_DEVICE_XEN,
+ GRUB_DISK_DEVICE_OBDISK_ID,
++ GRUB_DISK_DEVICE_NVME_ID
+ };
+
+ struct grub_disk;
+--
+2.39.5
+