summaryrefslogtreecommitdiff
path: root/config/grub/xhci/patches/0022-Add-native-NVMe-driver-based-on-SeaBIOS.patch
diff options
context:
space:
mode:
authorLeah Rowe <leah@libreboot.org>2024-08-11 16:11:31 +0100
committerLeah Rowe <leah@libreboot.org>2024-08-11 16:13:31 +0100
commit5b353a229066d1d440c8e9df9121468d53a08964 (patch)
tree84fb77793fb9d196273b645a6eb74f1be1b45ba9 /config/grub/xhci/patches/0022-Add-native-NVMe-driver-based-on-SeaBIOS.patch
parent80c3f9395dd9b49e6a82a79b42e109f0f0e5f890 (diff)
grub/*: Bump to rev b53ec06a1 (2024-06-17)
Of note: upstream has made several improvements to memory management, and several fixes to file systems. User-friendly change to LUKS: if the passphrase input failed, the user is prompted again for the correct passphrase, instead of GRUB just failing. Similar to cryptsetup luksOpen behaviour under Linux. This pulls in the following changes from upstream (gnu.org): * b53ec06a1 util/grub-mkrescue: Check existence of option arguments * ab9fe8030 loader/efi/fdt: Add fdtdump command to access device tree * 0cfec355d osdep/devmapper/getroot: Unmark 2 strings for translation * f171122f0 loader/emu/linux: Fix determination of program name * 828717833 disk/cryptodisk: Fix translatable message * 9a2134a70 tests: Add test for ZFS zstd * f96df6fe9 fs/zfs/zfs: Add support for zstd compression * 55d35d628 kern/efi/mm: Detect calls to grub_efi_drop_alloc() with wrong page counts * 61f1d0a61 kern/efi/mm: Change grub_efi_allocate_pages_real() to call semantically correct free function * dc0a3a27d kern/efi/mm: Change grub_efi_mm_add_regions() to keep track of map allocation size * b990df0be tests/util/grub-fs-tester: Fix EROFS label tests in grub-fs-tester * d41c64811 tests: Switch to requiring exfatprogs from exfat-utils * c1ee4da6a tests/util/grub-shell-luks-tester: Fix detached header test getting wrong header path * c22e052fe tests/util/grub-shell: Add flexibility in QEMU firmware handling * d2fc9dfcd tests/util/grub-shell: Use pflash instead of -bios to load UEFI firmware * 88a7e64c2 tests/util/grub-shell: Print gdbinfo if on EFI platform * b8d29f114 configure: Add Debian/Ubuntu DejaVu font path * 13b315c0a term/ns8250-spcr: Add one more 16550 debug type * 8abec8e15 loader/i386/multiboot_mbi: Fix handling of errors in broken aout-kludge * d35ff2251 net/drivers/ieee1275/ofnet: Remove 200 ms timeout in get_card_packet() to reduce input latency * 86df79275 commands/efi/tpm: Re-enable measurements on confidential computing platforms * 0b4d01794 util/grub-mkpasswd-pbkdf2: Simplify the main function implementation * fa36f6376 kern/ieee1275/init: Add IEEE 1275 Radix support for KVM on Power * c464f1ec3 fs/zfs/zfs: Mark vdev_zaps_v2 and head_errlog as supported * 2ffc14ba9 types: Add missing casts in compile-time byteswaps * c6ac49120 font: Add Fedora-specific font paths * 5e8989e4e fs/bfs: Fix improper grub_free() on non-existing files * c806e4dc8 io/gzio: Properly init a table * 243682baa io/gzio: Abort early when get_byte() reads nothing * bb65d81fe cli_lock: Add build option to block command line interface * 56e58828c fs/erofs: Add tests for EROFS in grub-fs-tester * 9d603061a fs/erofs: Add support for the EROFS * 1ba39de62 safemath: Add ALIGN_UP_OVF() which checks for an overflow * d291449ba docs: Fix spelling mistakes * 6cc2e4481 util/grub.d/00_header.in: Quote background image pathname in output * f456add5f disk/lvm: GRUB fails to detect LVM volumes due to an incorrect computation of mda_end * 386b59ddb disk/cryptodisk: Allow user to retry failed passphrase * 99b4c0c38 disk/mdraid1x_linux: Prevent infinite recursion * b272ed230 efi: Fix stack protector issues * 6744840b1 build: Track explicit module dependencies in Makefile.core.def Signed-off-by: Leah Rowe <leah@libreboot.org>
Diffstat (limited to 'config/grub/xhci/patches/0022-Add-native-NVMe-driver-based-on-SeaBIOS.patch')
-rw-r--r--config/grub/xhci/patches/0022-Add-native-NVMe-driver-based-on-SeaBIOS.patch1074
1 files changed, 1074 insertions, 0 deletions
diff --git a/config/grub/xhci/patches/0022-Add-native-NVMe-driver-based-on-SeaBIOS.patch b/config/grub/xhci/patches/0022-Add-native-NVMe-driver-based-on-SeaBIOS.patch
new file mode 100644
index 00000000..82452f1c
--- /dev/null
+++ b/config/grub/xhci/patches/0022-Add-native-NVMe-driver-based-on-SeaBIOS.patch
@@ -0,0 +1,1074 @@
+From 394102db8f4de6782b628b29c59d2634f2c72674 Mon Sep 17 00:00:00 2001
+From: Mate Kukri <km@mkukri.xyz>
+Date: Mon, 20 May 2024 11:43:35 +0100
+Subject: [PATCH 22/22] Add native NVMe driver based on SeaBIOS
+
+Tested to successfully boot Debian on QEMU and OptiPlex 3050.
+
+Signed-off-by: Mate Kukri <km@mkukri.xyz>
+---
+ Makefile.am | 2 +-
+ grub-core/Makefile.core.def | 6 +
+ grub-core/commands/nativedisk.c | 1 +
+ grub-core/disk/nvme-int.h | 208 +++++++++
+ grub-core/disk/nvme.c | 781 ++++++++++++++++++++++++++++++++
+ include/grub/disk.h | 1 +
+ 6 files changed, 998 insertions(+), 1 deletion(-)
+ create mode 100644 grub-core/disk/nvme-int.h
+ create mode 100644 grub-core/disk/nvme.c
+
+diff --git a/Makefile.am b/Makefile.am
+index 65016f856..7bc0866ba 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -434,7 +434,7 @@ if COND_i386_coreboot
+ FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst)
+ default_payload.elf: grub-mkstandalone grub-mkimage FORCE
+ test -f $@ && rm $@ || true
+- pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
++ pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata nvme xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
+ endif
+
+ endif
+diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def
+index 9d59acd1e..56076728b 100644
+--- a/grub-core/Makefile.core.def
++++ b/grub-core/Makefile.core.def
+@@ -2621,3 +2621,9 @@ module = {
+ enable = efi;
+ depends = part_gpt;
+ };
++
++module = {
++ name = nvme;
++ common = disk/nvme.c;
++ enable = pci;
++};
+diff --git a/grub-core/commands/nativedisk.c b/grub-core/commands/nativedisk.c
+index 6806bff9c..fd68a513e 100644
+--- a/grub-core/commands/nativedisk.c
++++ b/grub-core/commands/nativedisk.c
+@@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative)
+ case GRUB_DISK_DEVICE_ATA_ID:
+ case GRUB_DISK_DEVICE_SCSI_ID:
+ case GRUB_DISK_DEVICE_XEN:
++ case GRUB_DISK_DEVICE_NVME_ID:
+ if (getnative)
+ break;
+ /* FALLTHROUGH */
+diff --git a/grub-core/disk/nvme-int.h b/grub-core/disk/nvme-int.h
+new file mode 100644
+index 000000000..1295b58aa
+--- /dev/null
++++ b/grub-core/disk/nvme-int.h
+@@ -0,0 +1,208 @@
++// NVMe datastructures and constants
++//
++// Copyright 2017 Amazon.com, Inc. or its affiliates.
++//
++// This file may be distributed under the terms of the GNU LGPLv3 license.
++
++#ifndef __NVME_INT_H
++#define __NVME_INT_H
++
++#include <grub/types.h>
++
++/* Data structures */
++
++/* The register file of a NVMe host controller. This struct follows the naming
++ scheme in the NVMe specification. */
++struct nvme_reg {
++ grub_uint64_t cap; /* controller capabilities */
++ grub_uint32_t vs; /* version */
++ grub_uint32_t intms; /* interrupt mask set */
++ grub_uint32_t intmc; /* interrupt mask clear */
++ grub_uint32_t cc; /* controller configuration */
++ grub_uint32_t _res0;
++ grub_uint32_t csts; /* controller status */
++ grub_uint32_t _res1;
++ grub_uint32_t aqa; /* admin queue attributes */
++ grub_uint64_t asq; /* admin submission queue base address */
++ grub_uint64_t acq; /* admin completion queue base address */
++};
++
++/* Submission queue entry */
++struct nvme_sqe {
++ union {
++ grub_uint32_t dword[16];
++ struct {
++ grub_uint32_t cdw0; /* Command DWORD 0 */
++ grub_uint32_t nsid; /* Namespace ID */
++ grub_uint64_t _res0;
++ grub_uint64_t mptr; /* metadata ptr */
++
++ grub_uint64_t dptr_prp1;
++ grub_uint64_t dptr_prp2;
++ };
++ };
++};
++
++/* Completion queue entry */
++struct nvme_cqe {
++ union {
++ grub_uint32_t dword[4];
++ struct {
++ grub_uint32_t cdw0;
++ grub_uint32_t _res0;
++ grub_uint16_t sq_head;
++ grub_uint16_t sq_id;
++ grub_uint16_t cid;
++ grub_uint16_t status;
++ };
++ };
++};
++
++/* The common part of every submission or completion queue. */
++struct nvme_queue {
++ grub_uint32_t *dbl; /* doorbell */
++ grub_uint16_t mask; /* length - 1 */
++};
++
++struct nvme_cq {
++ struct nvme_queue common;
++ struct nvme_cqe *cqe;
++
++ /* We have read upto (but not including) this entry in the queue. */
++ grub_uint16_t head;
++
++ /* The current phase bit the controller uses to indicate that it has written
++ a new entry. This is inverted after each wrap. */
++ unsigned phase : 1;
++};
++
++struct nvme_sq {
++ struct nvme_queue common;
++ struct nvme_sqe *sqe;
++
++ /* Corresponding completion queue. We only support a single SQ per CQ. */
++ struct nvme_cq *cq;
++
++ /* The last entry the controller has fetched. */
++ grub_uint16_t head;
++
++ /* The last value we have written to the tail doorbell. */
++ grub_uint16_t tail;
++};
++
++struct nvme_ctrl {
++ grub_pci_device_t pci;
++ struct nvme_reg volatile *reg;
++
++ grub_uint32_t ctrlnum;
++
++ grub_uint32_t doorbell_stride; /* in bytes */
++
++ struct nvme_sq admin_sq;
++ struct nvme_cq admin_cq;
++
++ grub_uint32_t ns_count;
++
++ struct nvme_sq io_sq;
++ struct nvme_cq io_cq;
++};
++
++struct nvme_namespace {
++ struct nvme_namespace *next;
++ struct nvme_namespace **prev;
++
++ char *devname;
++
++ grub_uint32_t nsnum;
++
++ struct nvme_ctrl *ctrl;
++
++ grub_uint32_t ns_id;
++
++ grub_uint64_t lba_count; /* The total amount of sectors. */
++
++ grub_uint32_t block_size;
++ grub_uint32_t metadata_size;
++ grub_uint32_t max_req_size;
++};
++
++/* Data structures for NVMe admin identify commands */
++
++struct nvme_identify_ctrl {
++ grub_uint16_t vid;
++ grub_uint16_t ssvid;
++ char sn[20];
++ char mn[40];
++ char fr[8];
++
++ grub_uint8_t rab;
++ grub_uint8_t ieee[3];
++ grub_uint8_t cmic;
++ grub_uint8_t mdts;
++
++ char _boring[516 - 78];
++
++ grub_uint32_t nn; /* number of namespaces */
++};
++
++struct nvme_identify_ns_list {
++ grub_uint32_t ns_id[1024];
++};
++
++struct nvme_lba_format {
++ grub_uint16_t ms;
++ grub_uint8_t lbads;
++ grub_uint8_t rp;
++};
++
++struct nvme_identify_ns {
++ grub_uint64_t nsze;
++ grub_uint64_t ncap;
++ grub_uint64_t nuse;
++ grub_uint8_t nsfeat;
++ grub_uint8_t nlbaf;
++ grub_uint8_t flbas;
++
++ char _boring[128 - 27];
++
++ struct nvme_lba_format lbaf[16];
++};
++
++union nvme_identify {
++ struct nvme_identify_ns ns;
++ struct nvme_identify_ctrl ctrl;
++ struct nvme_identify_ns_list ns_list;
++};
++
++/* NVMe constants */
++
++#define NVME_CAP_CSS_NVME (1ULL << 37)
++
++#define NVME_CSTS_FATAL (1U << 1)
++#define NVME_CSTS_RDY (1U << 0)
++
++#define NVME_CC_EN (1U << 0)
++
++#define NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U
++#define NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U
++#define NVME_SQE_OPC_ADMIN_IDENTIFY 6U
++
++#define NVME_SQE_OPC_IO_WRITE 1U
++#define NVME_SQE_OPC_IO_READ 2U
++
++#define NVME_ADMIN_IDENTIFY_CNS_ID_NS 0U
++#define NVME_ADMIN_IDENTIFY_CNS_ID_CTRL 1U
++#define NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U
++
++#define NVME_CQE_DW3_P (1U << 16)
++
++#define NVME_PAGE_SIZE 4096
++#define NVME_PAGE_MASK ~(NVME_PAGE_SIZE - 1)
++
++/* Length for the queue entries. */
++#define NVME_SQE_SIZE_LOG 6
++#define NVME_CQE_SIZE_LOG 4
++
++#endif
++
++/* EOF */
+diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c
+new file mode 100644
+index 000000000..093237c70
+--- /dev/null
++++ b/grub-core/disk/nvme.c
+@@ -0,0 +1,781 @@
++// Low level NVMe disk access
++//
++// Based on SeaBIOS NVMe driver - Copyright 2017 Amazon.com, Inc. or its affiliates.
++// Port to GRUB2 done by Mate Kukri
++//
++// This file may be distributed under the terms of the GNU LGPLv3 license.
++
++#include <grub/disk.h>
++#include <grub/dl.h>
++#include <grub/pci.h>
++#include "nvme-int.h"
++
++GRUB_MOD_LICENSE ("GPLv3"); /* LGPLv3 in reality but it is GPLv3 compatible */
++
++static grub_uint32_t grub_nvme_ctrlcnt;
++static grub_uint32_t grub_nvme_nscnt;
++
++static struct nvme_namespace *grub_nvme_namespaces;
++
++// Page aligned "dma bounce buffer" of size NVME_PAGE_SIZE
++static void *nvme_dma_buffer;
++
++static void *
++zalloc_page_aligned(grub_uint32_t size)
++{
++ void *res = grub_memalign(NVME_PAGE_SIZE, size);
++ if (res) grub_memset(res, 0, size);
++ return res;
++}
++
++static void
++nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, grub_uint16_t q_idx,
++ grub_uint16_t length)
++{
++ grub_memset(q, 0, sizeof(*q));
++ q->dbl = (grub_uint32_t *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride);
++ grub_dprintf("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
++ q->mask = length - 1;
++}
++
++static int
++nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, grub_uint16_t length,
++ struct nvme_cq *cq)
++{
++ nvme_init_queue_common(ctrl, &sq->common, q_idx, length);
++ sq->sqe = zalloc_page_aligned(sizeof(*sq->sqe) * length);
++
++ if (!sq->sqe) {
++ return -1;
++ }
++
++ grub_dprintf("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
++ sq->cq = cq;
++ sq->head = 0;
++ sq->tail = 0;
++
++ return 0;
++}
++
++static int
++nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx, grub_uint16_t length)
++{
++ nvme_init_queue_common(ctrl, &cq->common, q_idx, length);
++ cq->cqe = zalloc_page_aligned(sizeof(*cq->cqe) * length);
++ if (!cq->cqe) {
++ return -1;
++ }
++
++ cq->head = 0;
++
++ /* All CQE phase bits are initialized to zero. This means initially we wait
++ for the host controller to set these to 1. */
++ cq->phase = 1;
++
++ return 0;
++}
++
++static int
++nvme_poll_cq(struct nvme_cq *cq)
++{
++ grub_uint32_t dw3 = *(volatile grub_uint32_t *) &cq->cqe[cq->head].dword[3];
++ return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase);
++}
++
++static int
++nvme_is_cqe_success(struct nvme_cqe const *cqe)
++{
++ return ((cqe->status >> 1) & 0xFF) == 0;
++}
++
++static struct nvme_cqe
++nvme_error_cqe(void)
++{
++ struct nvme_cqe r;
++
++ /* 0xFF is a vendor specific status code != success. Should be okay for
++ indicating failure. */
++ grub_memset(&r, 0xFF, sizeof(r));
++ return r;
++}
++
++static struct nvme_cqe
++nvme_consume_cqe(struct nvme_sq *sq)
++{
++ struct nvme_cq *cq = sq->cq;
++
++ if (!nvme_poll_cq(cq)) {
++ /* Cannot consume a completion queue entry, if there is none ready. */
++ return nvme_error_cqe();
++ }
++
++ struct nvme_cqe *cqe = &cq->cqe[cq->head];
++ grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask;
++ grub_dprintf("nvme", "cq %p head %u -> %u\n", cq, cq->head, cq_next_head);
++ if (cq_next_head < cq->head) {
++ grub_dprintf("nvme", "cq %p wrap\n", cq);
++ cq->phase = ~cq->phase;
++ }
++ cq->head = cq_next_head;
++
++ /* Update the submission queue head. */
++ if (cqe->sq_head != sq->head) {
++ sq->head = cqe->sq_head;
++ grub_dprintf("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head);
++ }
++
++ /* Tell the controller that we consumed the completion. */
++ *(volatile grub_uint32_t *) cq->common.dbl = cq->head;
++
++ return *cqe;
++}
++
++static struct nvme_cqe
++nvme_wait(struct nvme_sq *sq)
++{
++ // static const unsigned nvme_timeout = 5000 /* ms */;
++ // grub_uint32_t to = timer_calc(nvme_timeout);
++ while (!nvme_poll_cq(sq->cq)) {
++ /* FIXME
++ yield();
++
++ if (timer_check(to)) {
++ warn_timeout();
++ return nvme_error_cqe();
++ }*/
++ }
++
++ return nvme_consume_cqe(sq);
++}
++
++/* Returns the next submission queue entry (or NULL if the queue is full). It
++ also fills out Command Dword 0 and clears the rest. */
++static struct nvme_sqe *
++nvme_get_next_sqe(struct nvme_sq *sq, grub_uint8_t opc, void *metadata, void *data, void *data2)
++{
++ if (((sq->head + 1) & sq->common.mask) == sq->tail) {
++ grub_dprintf("nvme", "submission queue is full\n");
++ return NULL;
++ }
++
++ struct nvme_sqe *sqe = &sq->sqe[sq->tail];
++ grub_dprintf("nvme", "sq %p next_sqe %u\n", sq, sq->tail);
++
++ grub_memset(sqe, 0, sizeof(*sqe));
++ sqe->cdw0 = opc | (sq->tail << 16 /* CID */);
++ sqe->mptr = (grub_uint32_t)metadata;
++ sqe->dptr_prp1 = (grub_uint32_t)data;
++ sqe->dptr_prp2 = (grub_uint32_t)data2;
++
++ return sqe;
++}
++
++/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */
++static void
++nvme_commit_sqe(struct nvme_sq *sq)
++{
++ grub_dprintf("nvme", "sq %p commit_sqe %u\n", sq, sq->tail);
++ sq->tail = (sq->tail + 1) & sq->common.mask;
++ *(volatile grub_uint32_t *) sq->common.dbl = sq->tail;
++}
++
++/* Perform an identify command on the admin queue and return the resulting
++ buffer. This may be a NULL pointer, if something failed. This function
++ cannot be used after initialization, because it uses buffers in tmp zone. */
++static union nvme_identify *
++nvme_admin_identify(struct nvme_ctrl *ctrl, grub_uint8_t cns, grub_uint32_t nsid)
++{
++ union nvme_identify *identify_buf = zalloc_page_aligned(4096);
++ if (!identify_buf)
++ return NULL;
++
++ struct nvme_sqe *cmd_identify;
++ cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
++ identify_buf, NULL);
++ if (!cmd_identify)
++ goto error;
++
++ cmd_identify->nsid = nsid;
++ cmd_identify->dword[10] = cns;
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ goto error;
++ }
++
++ return identify_buf;
++ error:
++ grub_free(identify_buf);
++ return NULL;
++}
++
++static struct nvme_identify_ctrl *
++nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl)
++{
++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl;
++}
++
++static struct nvme_identify_ns *
++nvme_admin_identify_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_id)
++{
++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS,
++ ns_id)->ns;
++}
++
++static void
++nvme_probe_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_idx, grub_uint8_t mdts)
++{
++ grub_uint32_t ns_id = ns_idx + 1;
++
++ struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id);
++ if (!id) {
++ grub_dprintf("nvme", "NVMe couldn't identify namespace %u.\n", ns_id);
++ goto free_buffer;
++ }
++
++ grub_uint8_t current_lba_format = id->flbas & 0xF;
++ if (current_lba_format > id->nlbaf) {
++ grub_dprintf("nvme", "NVMe NS %u: current LBA format %u is beyond what the "
++ " namespace supports (%u)?\n",
++ ns_id, current_lba_format, id->nlbaf + 1);
++ goto free_buffer;
++ }
++
++ if (!id->nsze) {
++ grub_dprintf("nvme", "NVMe NS %u is inactive.\n", ns_id);
++ goto free_buffer;
++ }
++
++ if (!nvme_dma_buffer) {
++ nvme_dma_buffer = zalloc_page_aligned(NVME_PAGE_SIZE);
++ if (!nvme_dma_buffer) {
++ goto free_buffer;
++ }
++ }
++
++ struct nvme_namespace *ns = grub_malloc(sizeof(*ns));
++ if (!ns) {
++ goto free_buffer;
++ }
++ grub_memset(ns, 0, sizeof(*ns));
++ ns->ctrl = ctrl;
++ ns->ns_id = ns_id;
++ ns->lba_count = id->nsze;
++
++ struct nvme_lba_format *fmt = &id->lbaf[current_lba_format];
++
++ ns->block_size = 1U << fmt->lbads;
++ ns->metadata_size = fmt->ms;
++
++ if (ns->block_size > NVME_PAGE_SIZE) {
++ /* If we see devices that trigger this path, we need to increase our
++ buffer size. */
++ grub_free(ns);
++ goto free_buffer;
++ }
++
++ if (mdts) {
++ ns->max_req_size = ((1U << mdts) * NVME_PAGE_SIZE) / ns->block_size;
++ grub_dprintf("nvme", "NVME NS %u max request size: %d sectors\n",
++ ns_id, ns->max_req_size);
++ } else {
++ ns->max_req_size = -1U;
++ }
++
++ ns->devname = grub_xasprintf("nvme%un%u", ctrl->ctrlnum, ns_id);
++ ns->nsnum = grub_nvme_nscnt++;
++
++ grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST (ns));
++
++free_buffer:
++ grub_free(id);
++}
++
++
++/* Release memory allocated for a completion queue */
++static void
++nvme_destroy_cq(struct nvme_cq *cq)
++{
++ grub_free(cq->cqe);
++ cq->cqe = NULL;
++}
++
++/* Release memory allocated for a submission queue */
++static void
++nvme_destroy_sq(struct nvme_sq *sq)
++{
++ grub_free(sq->sqe);
++ sq->sqe = NULL;
++}
++
++/* Returns 0 on success. */
++static int
++nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx)
++{
++ int rc;
++ struct nvme_sqe *cmd_create_cq;
++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
++
++ rc = nvme_init_cq(ctrl, cq, q_idx, length);
++ if (rc) {
++ goto err;
++ }
++
++ cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL,
++ cq->cqe, NULL);
++ if (!cmd_create_cq) {
++ goto err_destroy_cq;
++ }
++
++ cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
++ cmd_create_cq->dword[11] = 1 /* physically contiguous */;
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "create io cq failed: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++
++ goto err_destroy_cq;
++ }
++
++ return 0;
++
++err_destroy_cq:
++ nvme_destroy_cq(cq);
++err:
++ return -1;
++}
++
++/* Returns 0 on success. */
++static int
++nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, struct nvme_cq *cq)
++{
++ int rc;
++ struct nvme_sqe *cmd_create_sq;
++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
++
++ rc = nvme_init_sq(ctrl, sq, q_idx, length, cq);
++ if (rc) {
++ goto err;
++ }
++
++ cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL,
++ sq->sqe, NULL);
++ if (!cmd_create_sq) {
++ goto err_destroy_sq;
++ }
++
++ cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
++ cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */;
++ grub_dprintf("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq,
++ cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "create io sq failed: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++ goto err_destroy_sq;
++ }
++
++ return 0;
++
++err_destroy_sq:
++ nvme_destroy_sq(sq);
++err:
++ return -1;
++}
++
++/* Reads count sectors into buf. The buffer cannot cross page boundaries. */
++static int
++nvme_io_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *prp1, void *prp2,
++ grub_uint16_t count, int write)
++{
++ if (((grub_uint32_t)prp1 & 0x3) || ((grub_uint32_t)prp2 & 0x3)) {
++ /* Buffer is misaligned */
++ return -1;
++ }
++
++ struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq,
++ write ? NVME_SQE_OPC_IO_WRITE
++ : NVME_SQE_OPC_IO_READ,
++ NULL, prp1, prp2);
++ io_read->nsid = ns->ns_id;
++ io_read->dword[10] = (grub_uint32_t)lba;
++ io_read->dword[11] = (grub_uint32_t)(lba >> 32);
++ io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1);
++
++ nvme_commit_sqe(&ns->ctrl->io_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "read io: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++
++ return -1;
++ }
++
++ grub_dprintf("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id, write ? "write" : "read",
++ lba, count);
++ return count;
++}
++
++// Transfer up to one page of data using the internal dma bounce buffer
++static int
++nvme_bounce_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count,
++ int write)
++{
++ grub_uint16_t const max_blocks = NVME_PAGE_SIZE / ns->block_size;
++ grub_uint16_t blocks = count < max_blocks ? count : max_blocks;
++
++ if (write)
++ grub_memcpy(nvme_dma_buffer, buf, blocks * ns->block_size);
++
++ int res = nvme_io_xfer(ns, lba, nvme_dma_buffer, NULL, blocks, write);
++
++ if (!write && res >= 0)
++ grub_memcpy(buf, nvme_dma_buffer, res * ns->block_size);
++
++ return res;
++}
++
++#define NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */
++
++// Transfer data using page list (if applicable)
++static int
++nvme_prpl_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count,
++ int write)
++{
++ grub_uint32_t base = (long)buf;
++ grub_int32_t size;
++
++ if (count > ns->max_req_size)
++ count = ns->max_req_size;
++
++ size = count * ns->block_size;
++ /* Special case for transfers that fit into PRP1, but are unaligned */
++ if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE))
++ goto single;
++
++ /* Every request has to be page aligned */
++ if (base & ~NVME_PAGE_MASK)
++ goto bounce;
++
++ /* Make sure a full block fits into the last chunk */
++ if (size & (ns->block_size - 1ULL))
++ goto bounce;
++
++ /* Build PRP list if we need to describe more than 2 pages */
++ if ((ns->block_size * count) > (NVME_PAGE_SIZE * 2)) {
++ grub_uint32_t prpl_len = 0;
++ grub_uint64_t *prpl = nvme_dma_buffer;
++ int first_page = 1;
++ for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) {
++ if (first_page) {
++ /* First page is special */
++ first_page = 0;
++ continue;
++ }
++ if (prpl_len >= NVME_MAX_PRPL_ENTRIES)
++ goto bounce;
++ prpl[prpl_len++] = base;
++ }
++ return nvme_io_xfer(ns, lba, buf, prpl, count, write);
++ }
++
++ /* Directly embed the 2nd page if we only need 2 pages */
++ if ((ns->block_size * count) > NVME_PAGE_SIZE)
++ return nvme_io_xfer(ns, lba, buf, (char *) buf + NVME_PAGE_SIZE, count, write);
++
++single:
++ /* One page is enough, don't expose anything else */
++ return nvme_io_xfer(ns, lba, buf, NULL, count, write);
++
++bounce:
++ /* Use bounce buffer to make transfer */
++ return nvme_bounce_xfer(ns, lba, buf, count, write);
++}
++
++static int
++nvme_create_io_queues(struct nvme_ctrl *ctrl)
++{
++ if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3))
++ goto err;
++
++ if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
++ goto err_free_cq;
++
++ return 0;
++
++ err_free_cq:
++ nvme_destroy_cq(&ctrl->io_cq);
++ err:
++ return -1;
++}
++
++/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
++static int
++nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy)
++{
++ // grub_uint32_t const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU);
++ // grub_uint32_t to = timer_calc(max_to);
++ grub_uint32_t csts;
++
++ while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) {
++ // FIXME
++ //yield();
++
++ if (csts & NVME_CSTS_FATAL) {
++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n");
++ return -1;
++ }
++
++ /*
++ if (timer_check(to)) {
++ warn_timeout();
++ return -1;
++ }*/
++ }
++
++ return 0;
++}
++
++/* Returns 0 on success. */
++static int grub_nvme_controller_enable(struct nvme_ctrl *ctrl)
++{
++ grub_pci_address_t addr;
++ int rc;
++
++ addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND);
++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_BUS_MASTER);
++
++ /* Turn the controller off. */
++ ctrl->reg->cc = 0;
++ if (nvme_wait_csts_rdy(ctrl, 0)) {
++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n");
++ return -1;
++ }
++
++ ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
++
++ rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1,
++ NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
++ if (rc) {
++ return -1;
++ }
++
++ rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0,
++ NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq);
++ if (rc) {
++ goto err_destroy_admin_cq;
++ }
++
++ ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
++ | ctrl->admin_sq.common.mask;
++
++ ctrl->reg->asq = (grub_uint32_t)ctrl->admin_sq.sqe;
++ ctrl->reg->acq = (grub_uint32_t)ctrl->admin_cq.cqe;
++
++ grub_dprintf("nvme", " admin submission queue: %p\n", ctrl->admin_sq.sqe);
++ grub_dprintf("nvme", " admin completion queue: %p\n", ctrl->admin_cq.cqe);
++
++ ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20)
++ | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */);
++
++ if (nvme_wait_csts_rdy(ctrl, 1)) {
++ grub_dprintf("nvme", "NVMe fatal error while enabling controller\n");
++ goto err_destroy_admin_sq;
++ }
++
++ /* The admin queue is set up and the controller is ready. Let's figure out
++ what namespaces we have. */
++
++ struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl);
++
++ if (!identify) {
++ grub_dprintf("nvme", "NVMe couldn't identify controller.\n");
++ goto err_destroy_admin_sq;
++ }
++
++ grub_dprintf("nvme", "NVMe has %u namespace%s.\n",
++ identify->nn, (identify->nn == 1) ? "" : "s");
++
++ ctrl->ns_count = identify->nn;
++ grub_uint8_t mdts = identify->mdts;
++ grub_free(identify);
++
++ if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) {
++ /* No point to continue, if the controller says it doesn't have
++ namespaces or we couldn't create I/O queues. */
++ goto err_destroy_admin_sq;
++ }
++
++ /* Give the controller a global number */
++ ctrl->ctrlnum = grub_nvme_ctrlcnt++;
++
++ /* Populate namespace IDs */
++ for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
++ nvme_probe_ns(ctrl, ns_idx, mdts);
++ }
++
++ grub_dprintf("nvme", "NVMe initialization complete!\n");
++ return 0;
++
++ err_destroy_admin_sq:
++ nvme_destroy_sq(&ctrl->admin_sq);
++ err_destroy_admin_cq:
++ nvme_destroy_cq(&ctrl->admin_cq);
++ return -1;
++}
++
++static int grub_nvme_pci_probe(grub_pci_device_t dev, grub_pci_id_t pciid __attribute__ ((unused)), void *data __attribute__ ((unused)))
++{
++ grub_pci_address_t addr;
++ grub_uint32_t class, bar, version;
++ struct nvme_reg volatile *reg;
++
++ class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS));
++ if (class >> 16 != 0x0108)
++ return 0;
++ if ((class >> 8 & 0xff) != 2) { /* as of NVM 1.0e */
++ grub_dprintf("nvme", "Found incompatble NVMe: prog-if=%02x\n", class >> 8 & 0xff);
++ return 0;
++ }
++
++ bar = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_ADDRESS_REG0));
++ reg = grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK, sizeof (*reg));
++
++ addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND);
++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_MEM_ENABLED);
++
++ version = reg->vs;
++ grub_dprintf("nvme", "Found NVMe controller with version %u.%u.%u.\n", version >> 16, (version >> 8) & 0xFF, version & 0xFF);
++ grub_dprintf("nvme", " Capabilities %016llx\n", reg->cap);
++
++ if (~reg->cap & NVME_CAP_CSS_NVME) {
++ grub_dprintf("nvme", "Controller doesn't speak NVMe command set. Skipping.\n");
++ goto err;
++ }
++
++ struct nvme_ctrl *ctrl = grub_malloc(sizeof(*ctrl));
++ if (!ctrl)
++ goto err;
++
++ grub_memset(ctrl, 0, sizeof(*ctrl));
++
++ ctrl->reg = reg;
++ ctrl->pci = dev;
++
++ if (grub_nvme_controller_enable(ctrl))
++ goto err_free_ctrl;
++
++ return 0;
++
++ err_free_ctrl:
++ grub_free(ctrl);
++ err:
++ grub_dprintf("nvme", "Failed to enable NVMe controller.\n");
++ return 0;
++}
++
++static int
++grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data, grub_disk_pull_t pull)
++{
++ struct nvme_namespace *ns;
++
++ if (pull != GRUB_DISK_PULL_NONE)
++ return 0;
++
++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces)
++ if (hook (ns->devname, hook_data))
++ return 1;
++
++ return 0;
++}
++
++static grub_err_t
++grub_nvme_open (const char *name __attribute ((unused)), grub_disk_t disk __attribute ((unused)))
++{
++ struct nvme_namespace *ns;
++
++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces)
++ if (grub_strcmp (ns->devname, name) == 0)
++ break;
++
++ if (! ns)
++ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device");
++
++ disk->total_sectors = ns->lba_count;
++ disk->max_agglomerate = ns->max_req_size;
++
++ disk->id = ns->nsnum; /* global id of the namespace */
++
++ disk->data = ns;
++
++ return 0;
++}
++
++static grub_err_t
++nvme_readwrite(struct nvme_namespace *ns, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf, int write)
++{
++ for (int i = 0; i < num_sectors;) {
++ grub_uint16_t blocks_remaining = num_sectors - i;
++ char *op_buf = buf + i * ns->block_size;
++ int blocks = nvme_prpl_xfer(ns, sector + i, op_buf, blocks_remaining, write);
++ if (blocks < 0)
++ return GRUB_ERR_IO;
++ i += blocks;
++ }
++ return GRUB_ERR_NONE;
++}
++
++static grub_err_t
++grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf)
++{
++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 0);
++}
++
++static grub_err_t
++grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, const char *buf)
++{
++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 1);
++}
++
++static struct grub_disk_dev grub_nvme_dev =
++ {
++ .name = "nvme",
++ .id = GRUB_DISK_DEVICE_NVME_ID,
++ .disk_iterate = grub_nvme_iterate,
++ .disk_open = grub_nvme_open,
++ .disk_read = grub_nvme_read,
++ .disk_write = grub_nvme_write,
++ .next = 0
++ };
++
++GRUB_MOD_INIT(nvme)
++{
++ grub_stop_disk_firmware ();
++ grub_pci_iterate (grub_nvme_pci_probe, NULL);
++ grub_disk_dev_register (&grub_nvme_dev);
++}
++
++GRUB_MOD_FINI(nvme)
++{
++ grub_disk_dev_unregister (&grub_nvme_dev);
++}
+diff --git a/include/grub/disk.h b/include/grub/disk.h
+index fbf23df7f..186e76f0b 100644
+--- a/include/grub/disk.h
++++ b/include/grub/disk.h
+@@ -52,6 +52,7 @@ enum grub_disk_dev_id
+ GRUB_DISK_DEVICE_UBOOTDISK_ID,
+ GRUB_DISK_DEVICE_XEN,
+ GRUB_DISK_DEVICE_OBDISK_ID,
++ GRUB_DISK_DEVICE_NVME_ID
+ };
+
+ struct grub_disk;
+--
+2.39.2
+