summaryrefslogtreecommitdiff
path: root/config/grub/default/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch
diff options
context:
space:
mode:
authorLeah Rowe <leah@libreboot.org>2024-06-01 23:01:30 +0100
committerLeah Rowe <leah@libreboot.org>2024-06-02 19:58:50 +0100
commit429e91f90894d30bc2c6e165d6f2a743c61b76f3 (patch)
tree67b347fab1b896ab95c38303b5e124eab16ece62 /config/grub/default/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch
parent9daf7f05f14c74189daa820eae0127641c4d11d4 (diff)
make GRUB multi-tree and re-add xhci patches
Re-add xHCI only on haswell and broadwell machines, where they are needed. Otherwise, keep the same GRUB code. The xHCI patches were removed because they caused issues on Sandybridge-based Dell Latitude laptops. See: https://codeberg.org/libreboot/lbmk/issues/216 The issue was not reported elsewhere, including on the Haswell/Broadwell hardware where they are needed, but the build system could only build one version of GRUB. The older machines do not need xHCI patches, because they either do not have xHCI patches, or work (in GRUB) because they're in EHCI mode when running the payload. So, the problem is that we need the xHCI patches for GRUB on Haswell/Broadwell hardware, but the patches break Sandybridge hardware, and we only had the one build of GRUB. To mitigate this problem, the build system now supports building multiple revisions of GRUB, with different patches, and each given coreboot target can say which GRUB tree to use by setting this in target.cfg: grubtree="xhci" In the above example, the "xhci" tree would be used. Some generic GRUB config has been moved to config/data/grub/ and config/grub/ now looks like config/coreboot/ - also, the grub.cfg file (named "payload" in each tree) is copied to the GRUB source tree as ".config", then added to GRUB's memdisk in the same way, as grub.cfg. Several other design changes had to be made because of this: * grub.cfg in memdisk no longer automatically jumps to one in CBFS, but now shows a menuentry for it if available * Certain commands in script/trees are disabled for GRUB, such as *config make commands. * gnulib is now defined in config/submodule/grub/, instead of config/git/grub - and this mitigates an existing bug where downloading gnulib first would make grub no longer possible to download in lbmk. The coreboot option CONFIG_FINALIZE_USB_ROUTE_XHCI has been re-enabled on: Dell OptiPlex 9020 MT, Dell OptiPlex 9020 SFF, Lenovo ThinkPad T440p and Lenovo ThinkPad W541 - now USB should work again in GRUB. The GRUB payload has been re-enabled on HP EliteBook 820 G2. This change will enable per-board GRUB optimisation in the future. For example, we hardcode what partitions and LVMs GRUB scans because * is slow on ICH7-based machines, due to GRUB's design. On other machines, * is reasonably fast, for automatically enumerating the list of devices for boot. Use of * (and other wildcards) could enable our GRUB payload to automatically boot more distros, with minimal fuss. This can be done at a later date, in subsequent revisions. Signed-off-by: Leah Rowe <leah@libreboot.org>
Diffstat (limited to 'config/grub/default/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch')
-rw-r--r--config/grub/default/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch1074
1 files changed, 1074 insertions, 0 deletions
diff --git a/config/grub/default/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch b/config/grub/default/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch
new file mode 100644
index 00000000..4345ebae
--- /dev/null
+++ b/config/grub/default/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch
@@ -0,0 +1,1074 @@
+From 708c0092d44aec6409e0ecc1925cdbb3b76c6dfd Mon Sep 17 00:00:00 2001
+From: Mate Kukri <km@mkukri.xyz>
+Date: Mon, 20 May 2024 11:43:35 +0100
+Subject: [PATCH 1/1] Add native NVMe driver based on SeaBIOS
+
+Tested to successfully boot Debian on QEMU and OptiPlex 3050.
+
+Signed-off-by: Mate Kukri <km@mkukri.xyz>
+---
+ Makefile.am | 2 +-
+ grub-core/Makefile.core.def | 6 +
+ grub-core/commands/nativedisk.c | 1 +
+ grub-core/disk/nvme-int.h | 208 +++++++++
+ grub-core/disk/nvme.c | 781 ++++++++++++++++++++++++++++++++
+ include/grub/disk.h | 1 +
+ 6 files changed, 998 insertions(+), 1 deletion(-)
+ create mode 100644 grub-core/disk/nvme-int.h
+ create mode 100644 grub-core/disk/nvme.c
+
+diff --git a/Makefile.am b/Makefile.am
+index 43635d5ff..2c86dbbf6 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -434,7 +434,7 @@ if COND_i386_coreboot
+ FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst)
+ default_payload.elf: grub-mkstandalone grub-mkimage FORCE
+ test -f $@ && rm $@ || true
+- pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
++ pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata nvme ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg
+ endif
+
+ endif
+diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def
+index 560c66447..d3bf9f1c5 100644
+--- a/grub-core/Makefile.core.def
++++ b/grub-core/Makefile.core.def
+@@ -2607,3 +2607,9 @@ module = {
+ efi = commands/bli.c;
+ enable = efi;
+ };
++
++module = {
++ name = nvme;
++ common = disk/nvme.c;
++ enable = pci;
++};
+diff --git a/grub-core/commands/nativedisk.c b/grub-core/commands/nativedisk.c
+index 6806bff9c..fd68a513e 100644
+--- a/grub-core/commands/nativedisk.c
++++ b/grub-core/commands/nativedisk.c
+@@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative)
+ case GRUB_DISK_DEVICE_ATA_ID:
+ case GRUB_DISK_DEVICE_SCSI_ID:
+ case GRUB_DISK_DEVICE_XEN:
++ case GRUB_DISK_DEVICE_NVME_ID:
+ if (getnative)
+ break;
+ /* FALLTHROUGH */
+diff --git a/grub-core/disk/nvme-int.h b/grub-core/disk/nvme-int.h
+new file mode 100644
+index 000000000..1295b58aa
+--- /dev/null
++++ b/grub-core/disk/nvme-int.h
+@@ -0,0 +1,208 @@
++// NVMe datastructures and constants
++//
++// Copyright 2017 Amazon.com, Inc. or its affiliates.
++//
++// This file may be distributed under the terms of the GNU LGPLv3 license.
++
++#ifndef __NVME_INT_H
++#define __NVME_INT_H
++
++#include <grub/types.h>
++
++/* Data structures */
++
++/* The register file of a NVMe host controller. This struct follows the naming
++ scheme in the NVMe specification. */
++struct nvme_reg {
++ grub_uint64_t cap; /* controller capabilities */
++ grub_uint32_t vs; /* version */
++ grub_uint32_t intms; /* interrupt mask set */
++ grub_uint32_t intmc; /* interrupt mask clear */
++ grub_uint32_t cc; /* controller configuration */
++ grub_uint32_t _res0;
++ grub_uint32_t csts; /* controller status */
++ grub_uint32_t _res1;
++ grub_uint32_t aqa; /* admin queue attributes */
++ grub_uint64_t asq; /* admin submission queue base address */
++ grub_uint64_t acq; /* admin completion queue base address */
++};
++
++/* Submission queue entry */
++struct nvme_sqe {
++ union {
++ grub_uint32_t dword[16];
++ struct {
++ grub_uint32_t cdw0; /* Command DWORD 0 */
++ grub_uint32_t nsid; /* Namespace ID */
++ grub_uint64_t _res0;
++ grub_uint64_t mptr; /* metadata ptr */
++
++ grub_uint64_t dptr_prp1;
++ grub_uint64_t dptr_prp2;
++ };
++ };
++};
++
++/* Completion queue entry */
++struct nvme_cqe {
++ union {
++ grub_uint32_t dword[4];
++ struct {
++ grub_uint32_t cdw0;
++ grub_uint32_t _res0;
++ grub_uint16_t sq_head;
++ grub_uint16_t sq_id;
++ grub_uint16_t cid;
++ grub_uint16_t status;
++ };
++ };
++};
++
++/* The common part of every submission or completion queue. */
++struct nvme_queue {
++ grub_uint32_t *dbl; /* doorbell */
++ grub_uint16_t mask; /* length - 1 */
++};
++
++struct nvme_cq {
++ struct nvme_queue common;
++ struct nvme_cqe *cqe;
++
++ /* We have read upto (but not including) this entry in the queue. */
++ grub_uint16_t head;
++
++ /* The current phase bit the controller uses to indicate that it has written
++ a new entry. This is inverted after each wrap. */
++ unsigned phase : 1;
++};
++
++struct nvme_sq {
++ struct nvme_queue common;
++ struct nvme_sqe *sqe;
++
++ /* Corresponding completion queue. We only support a single SQ per CQ. */
++ struct nvme_cq *cq;
++
++ /* The last entry the controller has fetched. */
++ grub_uint16_t head;
++
++ /* The last value we have written to the tail doorbell. */
++ grub_uint16_t tail;
++};
++
++struct nvme_ctrl {
++ grub_pci_device_t pci;
++ struct nvme_reg volatile *reg;
++
++ grub_uint32_t ctrlnum;
++
++ grub_uint32_t doorbell_stride; /* in bytes */
++
++ struct nvme_sq admin_sq;
++ struct nvme_cq admin_cq;
++
++ grub_uint32_t ns_count;
++
++ struct nvme_sq io_sq;
++ struct nvme_cq io_cq;
++};
++
++struct nvme_namespace {
++ struct nvme_namespace *next;
++ struct nvme_namespace **prev;
++
++ char *devname;
++
++ grub_uint32_t nsnum;
++
++ struct nvme_ctrl *ctrl;
++
++ grub_uint32_t ns_id;
++
++ grub_uint64_t lba_count; /* The total amount of sectors. */
++
++ grub_uint32_t block_size;
++ grub_uint32_t metadata_size;
++ grub_uint32_t max_req_size;
++};
++
++/* Data structures for NVMe admin identify commands */
++
++struct nvme_identify_ctrl {
++ grub_uint16_t vid;
++ grub_uint16_t ssvid;
++ char sn[20];
++ char mn[40];
++ char fr[8];
++
++ grub_uint8_t rab;
++ grub_uint8_t ieee[3];
++ grub_uint8_t cmic;
++ grub_uint8_t mdts;
++
++ char _boring[516 - 78];
++
++ grub_uint32_t nn; /* number of namespaces */
++};
++
++struct nvme_identify_ns_list {
++ grub_uint32_t ns_id[1024];
++};
++
++struct nvme_lba_format {
++ grub_uint16_t ms;
++ grub_uint8_t lbads;
++ grub_uint8_t rp;
++};
++
++struct nvme_identify_ns {
++ grub_uint64_t nsze;
++ grub_uint64_t ncap;
++ grub_uint64_t nuse;
++ grub_uint8_t nsfeat;
++ grub_uint8_t nlbaf;
++ grub_uint8_t flbas;
++
++ char _boring[128 - 27];
++
++ struct nvme_lba_format lbaf[16];
++};
++
++union nvme_identify {
++ struct nvme_identify_ns ns;
++ struct nvme_identify_ctrl ctrl;
++ struct nvme_identify_ns_list ns_list;
++};
++
++/* NVMe constants */
++
++#define NVME_CAP_CSS_NVME (1ULL << 37)
++
++#define NVME_CSTS_FATAL (1U << 1)
++#define NVME_CSTS_RDY (1U << 0)
++
++#define NVME_CC_EN (1U << 0)
++
++#define NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U
++#define NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U
++#define NVME_SQE_OPC_ADMIN_IDENTIFY 6U
++
++#define NVME_SQE_OPC_IO_WRITE 1U
++#define NVME_SQE_OPC_IO_READ 2U
++
++#define NVME_ADMIN_IDENTIFY_CNS_ID_NS 0U
++#define NVME_ADMIN_IDENTIFY_CNS_ID_CTRL 1U
++#define NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U
++
++#define NVME_CQE_DW3_P (1U << 16)
++
++#define NVME_PAGE_SIZE 4096
++#define NVME_PAGE_MASK ~(NVME_PAGE_SIZE - 1)
++
++/* Length for the queue entries. */
++#define NVME_SQE_SIZE_LOG 6
++#define NVME_CQE_SIZE_LOG 4
++
++#endif
++
++/* EOF */
+diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c
+new file mode 100644
+index 000000000..093237c70
+--- /dev/null
++++ b/grub-core/disk/nvme.c
+@@ -0,0 +1,781 @@
++// Low level NVMe disk access
++//
++// Based on SeaBIOS NVMe driver - Copyright 2017 Amazon.com, Inc. or its affiliates.
++// Port to GRUB2 done by Mate Kukri
++//
++// This file may be distributed under the terms of the GNU LGPLv3 license.
++
++#include <grub/disk.h>
++#include <grub/dl.h>
++#include <grub/pci.h>
++#include "nvme-int.h"
++
++GRUB_MOD_LICENSE ("GPLv3"); /* LGPLv3 in reality but it is GPLv3 compatible */
++
++static grub_uint32_t grub_nvme_ctrlcnt;
++static grub_uint32_t grub_nvme_nscnt;
++
++static struct nvme_namespace *grub_nvme_namespaces;
++
++// Page aligned "dma bounce buffer" of size NVME_PAGE_SIZE
++static void *nvme_dma_buffer;
++
++static void *
++zalloc_page_aligned(grub_uint32_t size)
++{
++ void *res = grub_memalign(NVME_PAGE_SIZE, size);
++ if (res) grub_memset(res, 0, size);
++ return res;
++}
++
++static void
++nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, grub_uint16_t q_idx,
++ grub_uint16_t length)
++{
++ grub_memset(q, 0, sizeof(*q));
++ q->dbl = (grub_uint32_t *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride);
++ grub_dprintf("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
++ q->mask = length - 1;
++}
++
++static int
++nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, grub_uint16_t length,
++ struct nvme_cq *cq)
++{
++ nvme_init_queue_common(ctrl, &sq->common, q_idx, length);
++ sq->sqe = zalloc_page_aligned(sizeof(*sq->sqe) * length);
++
++ if (!sq->sqe) {
++ return -1;
++ }
++
++ grub_dprintf("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
++ sq->cq = cq;
++ sq->head = 0;
++ sq->tail = 0;
++
++ return 0;
++}
++
++static int
++nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx, grub_uint16_t length)
++{
++ nvme_init_queue_common(ctrl, &cq->common, q_idx, length);
++ cq->cqe = zalloc_page_aligned(sizeof(*cq->cqe) * length);
++ if (!cq->cqe) {
++ return -1;
++ }
++
++ cq->head = 0;
++
++ /* All CQE phase bits are initialized to zero. This means initially we wait
++ for the host controller to set these to 1. */
++ cq->phase = 1;
++
++ return 0;
++}
++
++static int
++nvme_poll_cq(struct nvme_cq *cq)
++{
++ grub_uint32_t dw3 = *(volatile grub_uint32_t *) &cq->cqe[cq->head].dword[3];
++ return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase);
++}
++
++static int
++nvme_is_cqe_success(struct nvme_cqe const *cqe)
++{
++ return ((cqe->status >> 1) & 0xFF) == 0;
++}
++
++static struct nvme_cqe
++nvme_error_cqe(void)
++{
++ struct nvme_cqe r;
++
++ /* 0xFF is a vendor specific status code != success. Should be okay for
++ indicating failure. */
++ grub_memset(&r, 0xFF, sizeof(r));
++ return r;
++}
++
++static struct nvme_cqe
++nvme_consume_cqe(struct nvme_sq *sq)
++{
++ struct nvme_cq *cq = sq->cq;
++
++ if (!nvme_poll_cq(cq)) {
++ /* Cannot consume a completion queue entry, if there is none ready. */
++ return nvme_error_cqe();
++ }
++
++ struct nvme_cqe *cqe = &cq->cqe[cq->head];
++ grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask;
++ grub_dprintf("nvme", "cq %p head %u -> %u\n", cq, cq->head, cq_next_head);
++ if (cq_next_head < cq->head) {
++ grub_dprintf("nvme", "cq %p wrap\n", cq);
++ cq->phase = ~cq->phase;
++ }
++ cq->head = cq_next_head;
++
++ /* Update the submission queue head. */
++ if (cqe->sq_head != sq->head) {
++ sq->head = cqe->sq_head;
++ grub_dprintf("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head);
++ }
++
++ /* Tell the controller that we consumed the completion. */
++ *(volatile grub_uint32_t *) cq->common.dbl = cq->head;
++
++ return *cqe;
++}
++
++static struct nvme_cqe
++nvme_wait(struct nvme_sq *sq)
++{
++ // static const unsigned nvme_timeout = 5000 /* ms */;
++ // grub_uint32_t to = timer_calc(nvme_timeout);
++ while (!nvme_poll_cq(sq->cq)) {
++ /* FIXME
++ yield();
++
++ if (timer_check(to)) {
++ warn_timeout();
++ return nvme_error_cqe();
++ }*/
++ }
++
++ return nvme_consume_cqe(sq);
++}
++
++/* Returns the next submission queue entry (or NULL if the queue is full). It
++ also fills out Command Dword 0 and clears the rest. */
++static struct nvme_sqe *
++nvme_get_next_sqe(struct nvme_sq *sq, grub_uint8_t opc, void *metadata, void *data, void *data2)
++{
++ if (((sq->head + 1) & sq->common.mask) == sq->tail) {
++ grub_dprintf("nvme", "submission queue is full\n");
++ return NULL;
++ }
++
++ struct nvme_sqe *sqe = &sq->sqe[sq->tail];
++ grub_dprintf("nvme", "sq %p next_sqe %u\n", sq, sq->tail);
++
++ grub_memset(sqe, 0, sizeof(*sqe));
++ sqe->cdw0 = opc | (sq->tail << 16 /* CID */);
++ sqe->mptr = (grub_uint32_t)metadata;
++ sqe->dptr_prp1 = (grub_uint32_t)data;
++ sqe->dptr_prp2 = (grub_uint32_t)data2;
++
++ return sqe;
++}
++
++/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */
++static void
++nvme_commit_sqe(struct nvme_sq *sq)
++{
++ grub_dprintf("nvme", "sq %p commit_sqe %u\n", sq, sq->tail);
++ sq->tail = (sq->tail + 1) & sq->common.mask;
++ *(volatile grub_uint32_t *) sq->common.dbl = sq->tail;
++}
++
++/* Perform an identify command on the admin queue and return the resulting
++ buffer. This may be a NULL pointer, if something failed. This function
++ cannot be used after initialization, because it uses buffers in tmp zone. */
++static union nvme_identify *
++nvme_admin_identify(struct nvme_ctrl *ctrl, grub_uint8_t cns, grub_uint32_t nsid)
++{
++ union nvme_identify *identify_buf = zalloc_page_aligned(4096);
++ if (!identify_buf)
++ return NULL;
++
++ struct nvme_sqe *cmd_identify;
++ cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
++ identify_buf, NULL);
++ if (!cmd_identify)
++ goto error;
++
++ cmd_identify->nsid = nsid;
++ cmd_identify->dword[10] = cns;
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ goto error;
++ }
++
++ return identify_buf;
++ error:
++ grub_free(identify_buf);
++ return NULL;
++}
++
++static struct nvme_identify_ctrl *
++nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl)
++{
++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl;
++}
++
++static struct nvme_identify_ns *
++nvme_admin_identify_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_id)
++{
++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS,
++ ns_id)->ns;
++}
++
++static void
++nvme_probe_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_idx, grub_uint8_t mdts)
++{
++ grub_uint32_t ns_id = ns_idx + 1;
++
++ struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id);
++ if (!id) {
++ grub_dprintf("nvme", "NVMe couldn't identify namespace %u.\n", ns_id);
++ goto free_buffer;
++ }
++
++ grub_uint8_t current_lba_format = id->flbas & 0xF;
++ if (current_lba_format > id->nlbaf) {
++ grub_dprintf("nvme", "NVMe NS %u: current LBA format %u is beyond what the "
++ " namespace supports (%u)?\n",
++ ns_id, current_lba_format, id->nlbaf + 1);
++ goto free_buffer;
++ }
++
++ if (!id->nsze) {
++ grub_dprintf("nvme", "NVMe NS %u is inactive.\n", ns_id);
++ goto free_buffer;
++ }
++
++ if (!nvme_dma_buffer) {
++ nvme_dma_buffer = zalloc_page_aligned(NVME_PAGE_SIZE);
++ if (!nvme_dma_buffer) {
++ goto free_buffer;
++ }
++ }
++
++ struct nvme_namespace *ns = grub_malloc(sizeof(*ns));
++ if (!ns) {
++ goto free_buffer;
++ }
++ grub_memset(ns, 0, sizeof(*ns));
++ ns->ctrl = ctrl;
++ ns->ns_id = ns_id;
++ ns->lba_count = id->nsze;
++
++ struct nvme_lba_format *fmt = &id->lbaf[current_lba_format];
++
++ ns->block_size = 1U << fmt->lbads;
++ ns->metadata_size = fmt->ms;
++
++ if (ns->block_size > NVME_PAGE_SIZE) {
++ /* If we see devices that trigger this path, we need to increase our
++ buffer size. */
++ grub_free(ns);
++ goto free_buffer;
++ }
++
++ if (mdts) {
++ ns->max_req_size = ((1U << mdts) * NVME_PAGE_SIZE) / ns->block_size;
++ grub_dprintf("nvme", "NVME NS %u max request size: %d sectors\n",
++ ns_id, ns->max_req_size);
++ } else {
++ ns->max_req_size = -1U;
++ }
++
++ ns->devname = grub_xasprintf("nvme%un%u", ctrl->ctrlnum, ns_id);
++ ns->nsnum = grub_nvme_nscnt++;
++
++ grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST (ns));
++
++free_buffer:
++ grub_free(id);
++}
++
++
++/* Release memory allocated for a completion queue */
++static void
++nvme_destroy_cq(struct nvme_cq *cq)
++{
++ grub_free(cq->cqe);
++ cq->cqe = NULL;
++}
++
++/* Release memory allocated for a submission queue */
++static void
++nvme_destroy_sq(struct nvme_sq *sq)
++{
++ grub_free(sq->sqe);
++ sq->sqe = NULL;
++}
++
++/* Returns 0 on success. */
++static int
++nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx)
++{
++ int rc;
++ struct nvme_sqe *cmd_create_cq;
++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
++
++ rc = nvme_init_cq(ctrl, cq, q_idx, length);
++ if (rc) {
++ goto err;
++ }
++
++ cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL,
++ cq->cqe, NULL);
++ if (!cmd_create_cq) {
++ goto err_destroy_cq;
++ }
++
++ cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
++ cmd_create_cq->dword[11] = 1 /* physically contiguous */;
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "create io cq failed: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++
++ goto err_destroy_cq;
++ }
++
++ return 0;
++
++err_destroy_cq:
++ nvme_destroy_cq(cq);
++err:
++ return -1;
++}
++
++/* Returns 0 on success. */
++static int
++nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, struct nvme_cq *cq)
++{
++ int rc;
++ struct nvme_sqe *cmd_create_sq;
++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff);
++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
++
++ rc = nvme_init_sq(ctrl, sq, q_idx, length, cq);
++ if (rc) {
++ goto err;
++ }
++
++ cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq,
++ NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL,
++ sq->sqe, NULL);
++ if (!cmd_create_sq) {
++ goto err_destroy_sq;
++ }
++
++ cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
++ cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */;
++ grub_dprintf("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq,
++ cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
++
++ nvme_commit_sqe(&ctrl->admin_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "create io sq failed: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++ goto err_destroy_sq;
++ }
++
++ return 0;
++
++err_destroy_sq:
++ nvme_destroy_sq(sq);
++err:
++ return -1;
++}
++
++/* Reads count sectors into buf. The buffer cannot cross page boundaries. */
++static int
++nvme_io_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *prp1, void *prp2,
++ grub_uint16_t count, int write)
++{
++ if (((grub_uint32_t)prp1 & 0x3) || ((grub_uint32_t)prp2 & 0x3)) {
++ /* Buffer is misaligned */
++ return -1;
++ }
++
++ struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq,
++ write ? NVME_SQE_OPC_IO_WRITE
++ : NVME_SQE_OPC_IO_READ,
++ NULL, prp1, prp2);
++ io_read->nsid = ns->ns_id;
++ io_read->dword[10] = (grub_uint32_t)lba;
++ io_read->dword[11] = (grub_uint32_t)(lba >> 32);
++ io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1);
++
++ nvme_commit_sqe(&ns->ctrl->io_sq);
++
++ struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq);
++
++ if (!nvme_is_cqe_success(&cqe)) {
++ grub_dprintf("nvme", "read io: %08x %08x %08x %08x\n",
++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
++
++ return -1;
++ }
++
++ grub_dprintf("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id, write ? "write" : "read",
++ lba, count);
++ return count;
++}
++
++// Transfer up to one page of data using the internal dma bounce buffer
++static int
++nvme_bounce_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count,
++ int write)
++{
++ grub_uint16_t const max_blocks = NVME_PAGE_SIZE / ns->block_size;
++ grub_uint16_t blocks = count < max_blocks ? count : max_blocks;
++
++ if (write)
++ grub_memcpy(nvme_dma_buffer, buf, blocks * ns->block_size);
++
++ int res = nvme_io_xfer(ns, lba, nvme_dma_buffer, NULL, blocks, write);
++
++ if (!write && res >= 0)
++ grub_memcpy(buf, nvme_dma_buffer, res * ns->block_size);
++
++ return res;
++}
++
++#define NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */
++
++// Transfer data using page list (if applicable)
++static int
++nvme_prpl_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count,
++ int write)
++{
++ grub_uint32_t base = (long)buf;
++ grub_int32_t size;
++
++ if (count > ns->max_req_size)
++ count = ns->max_req_size;
++
++ size = count * ns->block_size;
++ /* Special case for transfers that fit into PRP1, but are unaligned */
++ if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE))
++ goto single;
++
++ /* Every request has to be page aligned */
++ if (base & ~NVME_PAGE_MASK)
++ goto bounce;
++
++ /* Make sure a full block fits into the last chunk */
++ if (size & (ns->block_size - 1ULL))
++ goto bounce;
++
++ /* Build PRP list if we need to describe more than 2 pages */
++ if ((ns->block_size * count) > (NVME_PAGE_SIZE * 2)) {
++ grub_uint32_t prpl_len = 0;
++ grub_uint64_t *prpl = nvme_dma_buffer;
++ int first_page = 1;
++ for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) {
++ if (first_page) {
++ /* First page is special */
++ first_page = 0;
++ continue;
++ }
++ if (prpl_len >= NVME_MAX_PRPL_ENTRIES)
++ goto bounce;
++ prpl[prpl_len++] = base;
++ }
++ return nvme_io_xfer(ns, lba, buf, prpl, count, write);
++ }
++
++ /* Directly embed the 2nd page if we only need 2 pages */
++ if ((ns->block_size * count) > NVME_PAGE_SIZE)
++ return nvme_io_xfer(ns, lba, buf, (char *) buf + NVME_PAGE_SIZE, count, write);
++
++single:
++ /* One page is enough, don't expose anything else */
++ return nvme_io_xfer(ns, lba, buf, NULL, count, write);
++
++bounce:
++ /* Use bounce buffer to make transfer */
++ return nvme_bounce_xfer(ns, lba, buf, count, write);
++}
++
++static int
++nvme_create_io_queues(struct nvme_ctrl *ctrl)
++{
++ if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3))
++ goto err;
++
++ if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
++ goto err_free_cq;
++
++ return 0;
++
++ err_free_cq:
++ nvme_destroy_cq(&ctrl->io_cq);
++ err:
++ return -1;
++}
++
++/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
++static int
++nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy)
++{
++ // grub_uint32_t const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU);
++ // grub_uint32_t to = timer_calc(max_to);
++ grub_uint32_t csts;
++
++ while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) {
++ // FIXME
++ //yield();
++
++ if (csts & NVME_CSTS_FATAL) {
++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n");
++ return -1;
++ }
++
++ /*
++ if (timer_check(to)) {
++ warn_timeout();
++ return -1;
++ }*/
++ }
++
++ return 0;
++}
++
++/* Returns 0 on success. */
++static int grub_nvme_controller_enable(struct nvme_ctrl *ctrl)
++{
++ grub_pci_address_t addr;
++ int rc;
++
++ addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND);
++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_BUS_MASTER);
++
++ /* Turn the controller off. */
++ ctrl->reg->cc = 0;
++ if (nvme_wait_csts_rdy(ctrl, 0)) {
++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n");
++ return -1;
++ }
++
++ ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
++
++ rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1,
++ NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
++ if (rc) {
++ return -1;
++ }
++
++ rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0,
++ NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq);
++ if (rc) {
++ goto err_destroy_admin_cq;
++ }
++
++ ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
++ | ctrl->admin_sq.common.mask;
++
++ ctrl->reg->asq = (grub_uint32_t)ctrl->admin_sq.sqe;
++ ctrl->reg->acq = (grub_uint32_t)ctrl->admin_cq.cqe;
++
++ grub_dprintf("nvme", " admin submission queue: %p\n", ctrl->admin_sq.sqe);
++ grub_dprintf("nvme", " admin completion queue: %p\n", ctrl->admin_cq.cqe);
++
++ ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20)
++ | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */);
++
++ if (nvme_wait_csts_rdy(ctrl, 1)) {
++ grub_dprintf("nvme", "NVMe fatal error while enabling controller\n");
++ goto err_destroy_admin_sq;
++ }
++
++ /* The admin queue is set up and the controller is ready. Let's figure out
++ what namespaces we have. */
++
++ struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl);
++
++ if (!identify) {
++ grub_dprintf("nvme", "NVMe couldn't identify controller.\n");
++ goto err_destroy_admin_sq;
++ }
++
++ grub_dprintf("nvme", "NVMe has %u namespace%s.\n",
++ identify->nn, (identify->nn == 1) ? "" : "s");
++
++ ctrl->ns_count = identify->nn;
++ grub_uint8_t mdts = identify->mdts;
++ grub_free(identify);
++
++ if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) {
++ /* No point to continue, if the controller says it doesn't have
++ namespaces or we couldn't create I/O queues. */
++ goto err_destroy_admin_sq;
++ }
++
++ /* Give the controller a global number */
++ ctrl->ctrlnum = grub_nvme_ctrlcnt++;
++
++ /* Populate namespace IDs */
++ for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
++ nvme_probe_ns(ctrl, ns_idx, mdts);
++ }
++
++ grub_dprintf("nvme", "NVMe initialization complete!\n");
++ return 0;
++
++ err_destroy_admin_sq:
++ nvme_destroy_sq(&ctrl->admin_sq);
++ err_destroy_admin_cq:
++ nvme_destroy_cq(&ctrl->admin_cq);
++ return -1;
++}
++
++static int grub_nvme_pci_probe(grub_pci_device_t dev, grub_pci_id_t pciid __attribute__ ((unused)), void *data __attribute__ ((unused)))
++{
++ grub_pci_address_t addr;
++ grub_uint32_t class, bar, version;
++ struct nvme_reg volatile *reg;
++
++ class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS));
++ if (class >> 16 != 0x0108)
++ return 0;
++ if ((class >> 8 & 0xff) != 2) { /* as of NVM 1.0e */
++ grub_dprintf("nvme", "Found incompatble NVMe: prog-if=%02x\n", class >> 8 & 0xff);
++ return 0;
++ }
++
++ bar = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_ADDRESS_REG0));
++ reg = grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK, sizeof (*reg));
++
++ addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND);
++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_MEM_ENABLED);
++
++ version = reg->vs;
++ grub_dprintf("nvme", "Found NVMe controller with version %u.%u.%u.\n", version >> 16, (version >> 8) & 0xFF, version & 0xFF);
++ grub_dprintf("nvme", " Capabilities %016llx\n", reg->cap);
++
++ if (~reg->cap & NVME_CAP_CSS_NVME) {
++ grub_dprintf("nvme", "Controller doesn't speak NVMe command set. Skipping.\n");
++ goto err;
++ }
++
++ struct nvme_ctrl *ctrl = grub_malloc(sizeof(*ctrl));
++ if (!ctrl)
++ goto err;
++
++ grub_memset(ctrl, 0, sizeof(*ctrl));
++
++ ctrl->reg = reg;
++ ctrl->pci = dev;
++
++ if (grub_nvme_controller_enable(ctrl))
++ goto err_free_ctrl;
++
++ return 0;
++
++ err_free_ctrl:
++ grub_free(ctrl);
++ err:
++ grub_dprintf("nvme", "Failed to enable NVMe controller.\n");
++ return 0;
++}
++
++static int
++grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data, grub_disk_pull_t pull)
++{
++ struct nvme_namespace *ns;
++
++ if (pull != GRUB_DISK_PULL_NONE)
++ return 0;
++
++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces)
++ if (hook (ns->devname, hook_data))
++ return 1;
++
++ return 0;
++}
++
++static grub_err_t
++grub_nvme_open (const char *name __attribute ((unused)), grub_disk_t disk __attribute ((unused)))
++{
++ struct nvme_namespace *ns;
++
++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces)
++ if (grub_strcmp (ns->devname, name) == 0)
++ break;
++
++ if (! ns)
++ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device");
++
++ disk->total_sectors = ns->lba_count;
++ disk->max_agglomerate = ns->max_req_size;
++
++ disk->id = ns->nsnum; /* global id of the namespace */
++
++ disk->data = ns;
++
++ return 0;
++}
++
++static grub_err_t
++nvme_readwrite(struct nvme_namespace *ns, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf, int write)
++{
++ for (int i = 0; i < num_sectors;) {
++ grub_uint16_t blocks_remaining = num_sectors - i;
++ char *op_buf = buf + i * ns->block_size;
++ int blocks = nvme_prpl_xfer(ns, sector + i, op_buf, blocks_remaining, write);
++ if (blocks < 0)
++ return GRUB_ERR_IO;
++ i += blocks;
++ }
++ return GRUB_ERR_NONE;
++}
++
++static grub_err_t
++grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf)
++{
++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 0);
++}
++
++static grub_err_t
++grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, const char *buf)
++{
++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 1);
++}
++
++static struct grub_disk_dev grub_nvme_dev =
++ {
++ .name = "nvme",
++ .id = GRUB_DISK_DEVICE_NVME_ID,
++ .disk_iterate = grub_nvme_iterate,
++ .disk_open = grub_nvme_open,
++ .disk_read = grub_nvme_read,
++ .disk_write = grub_nvme_write,
++ .next = 0
++ };
++
++GRUB_MOD_INIT(nvme)
++{
++ grub_stop_disk_firmware ();
++ grub_pci_iterate (grub_nvme_pci_probe, NULL);
++ grub_disk_dev_register (&grub_nvme_dev);
++}
++
++GRUB_MOD_FINI(nvme)
++{
++ grub_disk_dev_unregister (&grub_nvme_dev);
++}
+diff --git a/include/grub/disk.h b/include/grub/disk.h
+index fbf23df7f..186e76f0b 100644
+--- a/include/grub/disk.h
++++ b/include/grub/disk.h
+@@ -52,6 +52,7 @@ enum grub_disk_dev_id
+ GRUB_DISK_DEVICE_UBOOTDISK_ID,
+ GRUB_DISK_DEVICE_XEN,
+ GRUB_DISK_DEVICE_OBDISK_ID,
++ GRUB_DISK_DEVICE_NVME_ID
+ };
+
+ struct grub_disk;
+--
+2.39.2
+